Source code for src.dackar.utils.utils

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED
"""
Created on February, 2024

@author: wangc, mandd
"""
import re
import logging
import toml
import os
import pathlib


[docs]
logger = logging.getLogger('DACKAR.utils')




[docs]
def getOnlyWords(s):
  """
    Returns a string with only the words (removes things like T8, A-b, etc)

    Args:
      s: string

    Returns:
      string with only the words
  """
  # [-A-Za-z0-9#]+ pattern for any combinations "-", "A-Z", "a-z", "0-9", "#" and "/"
  l = re.split(r"([-A-Za-z0-9#/%]+)", s)
  # only remove strings that contain "-" or numbers
  return "".join([x for x in l if not re.search(r"[-0-9]+",x)])



[docs]
def getShortAcronym(s):
  """
    Remove things like h/s, s/d, etc.

    Args:
      s: string

    Returns:
      string with only the words
  """
  l = re.split(r"(\b[A-Za-z]/[A-Za-z])(?=\s)", s)
  acronym = [x for x in l if re.search("[/]", x) and len(x)==3]
  ns = "".join([x for x in l if x not in acronym])
  return ns, acronym



[docs]
def readToml(filePath):
  """Read toml formatted file

  Args:
      filePath (str): the path to the file

  Returns:
      dict: dictionary of file content
  """
  with open(filePath, 'r') as file:
    path = pathlib.Path(filePath).parent
    dataDict = toml.load(file)
    if 'nlp' in dataDict:
      for f in dataDict['nlp']['files']:
        dataDict['nlp']['files'][f] = os.path.join(path, dataDict['nlp']['files'][f])
    # if 'neo4j' in dataDict:
    #   for node in dataDict['neo4j']['node']:
    #     node['file'] = os.path.join(path, node['file'])
    #   for edge in dataDict['neo4j']['edge']:
    #     edge['file'] = os.path.join(path, edge['file'])

  return dataDict



[docs]
def writeToFile(filePath, content):
  """Write content into file

  Args:
      filePath (str): file path
      content (str): content that will be written
  """
  with open(filePath, 'w') as file:
    file.write(content)