Source code for src.dackar.pipelines.LocationEntity

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

from spacy.language import Language
import pandas as pd

from ..utils.nlp.nlp_utils import generatePatternList

import logging
[docs] logger = logging.getLogger(__name__)
@Language.factory("location_entity", default_config={"patterns": None})
[docs] def create_location_component(nlp, name, patterns): return LocationEntity(nlp, patterns=patterns)
[docs] class LocationEntity(object): """ How to use it: .. code-block:: python from LocationEntity import LocationEntity nlp = spacy.load("en_core_web_sm") patterns = {'label': 'location', 'pattern': [{'LOWER': 'follow'}], 'id': 'location'} cmatcher = ConjectureEntity(nlp, patterns) doc = nlp("It is close to 5pm.") updatedDoc = cmatcher(doc) or: .. code-block:: python nlp.add_pipe('location_entity', config={"patterns": {'label': 'location', 'pattern': [{'LOWER': 'follow'}], 'id': 'location'}}) newDoc = nlp(doc.text) """ def __init__(self, nlp, patterns=None, callback=None): """ Args: patterns: list/dict, patterns for Location Entity """
[docs] self.name = 'location_entity'
if patterns is None: # update to use config file instead # filename = nlpConfig['files']['location_file'] filename = '~/projects/raven/plugins/SR2ML/src/nlp/data/location_keywords.csv' entLists = pd.read_csv(filename, header=0) proxList = entLists['proximity'].dropna().values.ravel().tolist() upList = entLists['up'].dropna().values.ravel().tolist() downList = entLists['down'].dropna().values.ravel().tolist() patterns = [] proxPatterns = generatePatternList(proxList, label='location_proximity', id='location', nlp=nlp, attr="LEMMA") patterns.extend(proxPatterns) upPatterns = generatePatternList(upList, label='location_up', id='location', nlp=nlp, attr="LEMMA") patterns.extend(upPatterns) downPatterns = generatePatternList(downList, label='location_down', id='location', nlp=nlp, attr="LEMMA") patterns.extend(downPatterns) if not isinstance(patterns, list) and isinstance(patterns, dict): patterns = [patterns] # do we need to pop out other pipes? if not nlp.has_pipe('entity_ruler'): nlp.add_pipe('entity_ruler')
[docs] self.entityRuler = nlp.get_pipe('entity_ruler')
self.entityRuler.add_patterns(patterns)
[docs] def __call__(self, doc): """ Args: doc: spacy.tokens.doc.Doc, the processed document using nlp pipelines """ doc = self.entityRuler(doc) return doc