# Copyright 2024, Battelle Energy Alliance, LLC ALL RIGHTS RESERVED
import logging
import spacy
import pandas as pd
from .nlp_utils import generatePatternList
[docs]
class CreatePatterns(object):
def __init__(self, filename, entLabel, entID=None, nlp=None, *args, **kwargs):
"""
"""
[docs]
self.filename = filename
if entID is None:
self.id = entLabel
else:
self.id = entID
[docs]
self.entities = self.readFile()
if nlp is None:
self.nlp = spacy.load("en_core_web_lg", exclude=[])
else:
self.nlp = nlp
[docs]
self.patterns = self.generatePatterns()
[docs]
def readFile(self):
"""
"""
# assume one column without column name for the csv file
entList = pd.read_csv(self.filename).values.ravel().tolist()
return entList
[docs]
def generatePatterns(self):
"""
"""
patterns = generatePatternList(self.entities, label=self.label, id=self.id, nlp=self.nlp, attr="LEMMA")
return patterns
[docs]
def getPatterns(self):
"""
"""
return self.patterns