Source code for src.dackar.utils.nlp.CreatePatterns

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

import logging
import spacy
import pandas as pd

from .nlp_utils import generatePatternList


[docs] class CreatePatterns(object): def __init__(self, filename, entLabel, entID=None, nlp=None, *args, **kwargs): """ """
[docs] self.filename = filename
[docs] self.label = entLabel
if entID is None: self.id = entLabel else: self.id = entID
[docs] self.entities = self.readFile()
if nlp is None: self.nlp = spacy.load("en_core_web_lg", exclude=[]) else: self.nlp = nlp
[docs] self.patterns = self.generatePatterns()
[docs] def readFile(self): """ """ # assume one column without column name for the csv file entList = pd.read_csv(self.filename).values.ravel().tolist() return entList
[docs] def generatePatterns(self): """ """ patterns = generatePatternList(self.entities, label=self.label, id=self.id, nlp=self.nlp, attr="LEMMA") return patterns
[docs] def getPatterns(self): """ """ return self.patterns