# Copyright 2024, Battelle Energy Alliance, LLC ALL RIGHTS RESERVED
"""
Created on October, 2022
@author: wangc, mandd
"""
from .SpellChecker import SpellChecker
from .Preprocessing import Preprocessing
import pandas as pd
[docs]
class AbbrExpander(object):
"""
Class to expand abbreviations
"""
def __init__(self, abbreviationsFilename, checkerType='autocorrect', abbrType='mixed'):
"""
Abbreviation expander constructor
Args:
abbreviationsFilename: str, filename of abbreviations data
checkerType: str, type for spell checker class, i.e., 'autocorrect', 'pyspellchecker', and 'contextual spellcheck', default is 'autocorrect'
abbrType: str, type of abbreviation method ('spellcheck','hard','mixed') that are employed
to determine which words are abbreviations that need to be expanded
* spellcheck: in this case spellchecker is used to identify words that
are not recognized
* hard: here we directly search for the abbreviations in the provided
sentence
* mixed: here we perform first a "hard" search followed by a "spellcheck"
search
Return:
None
"""
[docs]
self.abbrType = abbrType
[docs]
self.checkerType = checkerType
[docs]
self.abbrList = pd.read_excel(abbreviationsFilename)
[docs]
self.preprocessorList = ['hyphenated_words',
'whitespace',
'numerize']
[docs]
self.preprocess = Preprocessing(self.preprocessorList, {})
[docs]
self.checker = SpellChecker(checker=self.checkerType)
[docs]
self.abbrDict = self.checker.generateAbbrDict(self.abbrList)
[docs]
def abbrProcess(self, text, splitToList=False):
"""
Expands the abbreviations in text
Args:
text: string, the text to expand
splitToList: bool, True if splitting the text into sentences, default is False
Returns:
expandedText: string, the text with abbreviations expanded
"""
text = self.preprocess(text)
if not splitToList:
expandedText = self.checker.handleAbbreviationsDict(self.abbrDict, text.lower(), type=self.abbrType)
else:
text = text.replace("\n", "")
textList = [t.strip() for t in text.split('.')]
expandedText = []
for t in textList:
cleanedText = self.checker.handleAbbreviationsDict(self.abbrDict, t.lower(), type=self.abbrType)
expandedText.append(cleanedText)
expandedText = '. '.join(expandedText)
return expandedText