Source code for src.dackar.pipelines.LocationEntity

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

from spacy.language import Language
import pandas as pd

from ..utils.nlp.nlp_utils import generatePatternList
from ..config import nlpConfig

import logging

[docs]
logger = logging.getLogger(__name__)


@Language.factory("location_entity", default_config={"patterns": None})

[docs]
def create_location_component(nlp, name, patterns):
  return LocationEntity(nlp, patterns=patterns)




[docs]
class LocationEntity(object):
  """
    Location Entity Recognition class

    How to use it:

    .. code-block:: python

      from LocationEntity import LocationEntity
      nlp = spacy.load("en_core_web_sm")
      patterns = {'label': 'location', 'pattern': [{'LOWER': 'follow'}], 'id': 'location'}
      cmatcher = ConjectureEntity(nlp, patterns)
      doc = nlp("It is close to 5pm.")
      updatedDoc = cmatcher(doc)

    or:

    .. code-block:: python

      nlp.add_pipe('location_entity', config={"patterns": {'label': 'location', 'pattern': [{'LOWER': 'follow'}], 'id': 'location'}})
      newDoc = nlp(doc.text)
  """

  def __init__(self, nlp, patterns=None, callback=None):
    """
    Args:

      patterns: list/dict, patterns for Location Entity
    """

[docs]
    self.name = 'location_entity'

    if patterns is None:
      # update to use config file instead
      filename = nlpConfig['files']['location_file']
      entLists = pd.read_csv(filename, header=0)
      proxList = entLists['proximity'].dropna().values.ravel().tolist()
      upList = entLists['up'].dropna().values.ravel().tolist()
      downList = entLists['down'].dropna().values.ravel().tolist()
      patterns = []
      proxPatterns = generatePatternList(proxList, label='location_proximity', id='location', nlp=nlp, attr="LEMMA")
      patterns.extend(proxPatterns)
      upPatterns = generatePatternList(upList, label='location_up', id='location', nlp=nlp, attr="LEMMA")
      patterns.extend(upPatterns)
      downPatterns = generatePatternList(downList, label='location_down', id='location', nlp=nlp, attr="LEMMA")
      patterns.extend(downPatterns)

    if not isinstance(patterns, list) and isinstance(patterns, dict):
      patterns = [patterns]
    # do we need to pop out other pipes?
    if not nlp.has_pipe('entity_ruler'):
      nlp.add_pipe('entity_ruler')

[docs]
    self.entityRuler = nlp.get_pipe('entity_ruler')

    self.entityRuler.add_patterns(patterns)


[docs]
  def __call__(self, doc):
    """
    Args:

        doc: spacy.tokens.doc.Doc, the processed document using nlp pipelines
    """
    doc = self.entityRuler(doc)
    return doc