Source code for src.dackar.utils.utils

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED
"""
Created on February, 2024

@author: wangc, mandd
"""
import re
import logging
import toml
import os
import pathlib

[docs] logger = logging.getLogger('DACKAR.utils')
[docs] def getOnlyWords(s): """ Returns a string with only the words (removes things like T8, A-b, etc) Args: s: string Returns: string with only the words """ # [-A-Za-z0-9#]+ pattern for any combinations "-", "A-Z", "a-z", "0-9", "#" and "/" l = re.split(r"([-A-Za-z0-9#/%]+)", s) # only remove strings that contain "-" or numbers return "".join([x for x in l if not re.search(r"[-0-9]+",x)])
[docs] def getShortAcronym(s): """ Remove things like h/s, s/d, etc. Args: s: string Returns: string with only the words """ l = re.split(r"(\b[A-Za-z]/[A-Za-z])(?=\s)", s) acronym = [x for x in l if re.search("[/]", x) and len(x)==3] ns = "".join([x for x in l if x not in acronym]) return ns, acronym
[docs] def readToml(filePath): """Read toml formatted file Args: filePath (str): the path to the file Returns: dict: dictionary of file content """ with open(filePath, 'r') as file: path = pathlib.Path(filePath).parent dataDict = toml.load(file) if 'nlp' in dataDict: for f in dataDict['nlp']['files']: dataDict['nlp']['files'][f] = os.path.join(path, dataDict['nlp']['files'][f]) # if 'neo4j' in dataDict: # for node in dataDict['neo4j']['node']: # node['file'] = os.path.join(path, node['file']) # for edge in dataDict['neo4j']['edge']: # edge['file'] = os.path.join(path, edge['file']) return dataDict
[docs] def writeToFile(filePath, content): """Write content into file Args: filePath (str): file path content (str): content that will be written """ with open(filePath, 'w') as file: file.write(content)