# Copyright 2024, Battelle Energy Alliance, LLC ALL RIGHTS RESERVED
"""
Created on February, 2024
@author: wangc, mandd
"""
import re
import logging
import toml
import os
import pathlib
[docs]
logger = logging.getLogger('DACKAR.utils')
[docs]
def getOnlyWords(s):
"""
Returns a string with only the words (removes things like T8, A-b, etc)
Args:
s: string
Returns:
string with only the words
"""
# [-A-Za-z0-9#]+ pattern for any combinations "-", "A-Z", "a-z", "0-9", "#" and "/"
l = re.split(r"([-A-Za-z0-9#/%]+)", s)
# only remove strings that contain "-" or numbers
return "".join([x for x in l if not re.search(r"[-0-9]+",x)])
[docs]
def getShortAcronym(s):
"""
Remove things like h/s, s/d, etc.
Args:
s: string
Returns:
string with only the words
"""
l = re.split(r"(\b[A-Za-z]/[A-Za-z])(?=\s)", s)
acronym = [x for x in l if re.search("[/]", x) and len(x)==3]
ns = "".join([x for x in l if x not in acronym])
return ns, acronym
[docs]
def readToml(filePath):
"""Read toml formatted file
Args:
filePath (str): the path to the file
Returns:
dict: dictionary of file content
"""
with open(filePath, 'r') as file:
path = pathlib.Path(filePath).parent
dataDict = toml.load(file)
if 'nlp' in dataDict:
for f in dataDict['nlp']['files']:
dataDict['nlp']['files'][f] = os.path.join(path, dataDict['nlp']['files'][f])
# if 'neo4j' in dataDict:
# for node in dataDict['neo4j']['node']:
# node['file'] = os.path.join(path, node['file'])
# for edge in dataDict['neo4j']['edge']:
# edge['file'] = os.path.join(path, edge['file'])
return dataDict
[docs]
def writeToFile(filePath, content):
"""Write content into file
Args:
filePath (str): file path
content (str): content that will be written
"""
with open(filePath, 'w') as file:
file.write(content)