Source code for src.dackar.utils.mbse.customMBSEparser

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

"""
Created on February, 2024

@author: mandd
"""

# External Imports
import pandas as pd
import logging

[docs] logger = logging.getLogger("my logger")
[docs] c_handler = logging.StreamHandler()
logger.addHandler(c_handler) logger.setLevel(logging.DEBUG)
[docs] class customMBSEobject(object): """ Class designed to process the a custom MBSE model from file. """ def __init__(self, nodesFilename, edgesFilename): """ Initialization method for the custom MBSE model class Args: nodesFilename: file, file in .csv format containing all nodes edgesFilename: file, file in .csv format containing all edges Returns: None """
[docs] self.nodesFilename = nodesFilename
[docs] self.edgesFilename = edgesFilename
[docs] self.listIDs = []
[docs] self.allowedNodeTypes = ['entity']
[docs] self.allowedEdgeTypes = ['link','composition','support'] # to be developed: 'opm_instance'
[docs] self.allowedNodeCols = ['label','ID','type']
[docs] self.allowed_edge_cols = ['sourceNodeId','targetNodeId','type','medium']
self.parseFiles() self.checkNodes() self.checkEdges() nodesFileSplit = self.nodesFilename.split('.') nodesFileKg = nodesFileSplit[0] + '_kg.' + nodesFileSplit[1] edgesFileSplit = self.edgesFilename.split('.') edgesFileKg = edgesFileSplit[0] + '_kg.' + edgesFileSplit[1] self.printOnFiles(nodesFileKg,edgesFileKg)
[docs] def checkModel(self): """ Method designed to pcheck model consistency Args: None Returns: None """ self.checkNodes() self.checkEdges()
[docs] def parseFiles(self): """ Method designed to parse the node and edge files Args: None Returns: None """ # parse nodes self.nodesDf = pd.read_csv(self.nodesFilename, sep=',', skip_blank_lines=True, dtype=str) self.nodesDf.dropna(how='all', inplace=True) self.nodesDf = self.nodesDf.apply(lambda x: x.astype(str).str.lower()) self.listIDs = self.nodesDf['ID'].dropna().to_list() # parse edges self.edgesDf = pd.read_csv(self.edgesFilename, sep=',', skip_blank_lines=True, dtype=str) self.edgesDf.dropna(how='all', inplace=True) self.edgesDf = self.edgesDf.apply(lambda x: x.astype(str).str.lower())
[docs] def checkNodes(self): """ Method designed to check the node file Args: None Returns: None """ logger.info('- Check node file -') # Check all columns are present cols = self.nodesDf.columns.tolist() if set(cols)!=set(self.allowedNodeCols): raise IOError('Node file structure check - Error: wrong set of provided columns ' + str(cols) + ' (allowed: label, ID, type)') else: logger.info('Node file structure check - Pass') # Check for duplicate IDs duplicateIDs = self.nodesDf.duplicated() if self.nodesDf[duplicateIDs].empty: logger.info("List of node IDs check - Pass") else: logger.info("List of node IDs check - Error: duplicate IDs were found:") logger.info(self.nodesDf[duplicateIDs]) #check for structure of each row logger.info("Entity check...") for index, row in self.nodesDf.iterrows(): if row['type'] not in set(self.allowedNodeTypes): raise IOError('Type of row ' + str(index) + ' in node file is not allowed. Allowed types: ' +str(self.allowedNodeTypes)) if pd.isnull(row['type']) and pd.isnull(row['ID']): raise IOError('Entity of row ' + str(index) + ' in node file: Error - neither type nor ID have been specified') logger.info("Entities check: Pass")
[docs] def checkEdges(self): """ Methods designed to check the edge file Args: None Returns: None """ logger.info('- Check edge file -') # Check all columns are present cols = self.edgesDf.columns.tolist() if set(cols)!=set(self.allowed_edge_cols): raise IOError('Edge file structure check - Error: wrong set of provided columns (allowed: sourceNodeId,targetNodeId,type,medium)') else: logger.info('Edge file structure check - Pass') # Check for duplicate edges duplicateEdges = self.edgesDf[['sourceNodeId','targetNodeId']].duplicated() if self.edgesDf[duplicateEdges].empty: logger.info("List of edges check - Pass") else: logger.info("List of edges check - Error: duplicate edges were found:") logger.info(self.edgesDf[duplicateEdges]) # Check IDs in edge file are defined in node file sourceNodeId_list = self.edgesDf['sourceNodeId'].to_list() diff1 = set(sourceNodeId_list) - set(self.listIDs) if diff1: raise IOError('Error - Edge file: not recognized entities: ' + str(diff1)) targetNodeId_list = self.edgesDf['targetNodeId'].to_list() diff2 = set(targetNodeId_list) - set(self.listIDs) if diff2: raise IOError('Error - Edge file: not recognized entities: ' + str(diff2)) # Check for structure of each row logger.info("Edges check...") for index, row in self.edgesDf.iterrows(): if pd.isnull(row['sourceNodeId']) or pd.isnull(row['targetNodeId']): logger.info(row) raise IOError('Edge ' + str(index) + ' in edge file: Error - both sourceNodeId and targetNodeId need to be specified') if row['type'] not in set(self.allowedEdgeTypes): logger.info(row) raise IOError('Type of row ' + str(index) + ' in edge file is not allowed. Allowed types: ' +str(self.allowedEdgeTypes)) if row['type']=='link' and pd.isnull(row['medium']): logger.info(row) raise IOError('Edge ' + str(index) + ' in edge file: Error - link does not have a medium specified') if row['type']=='support' and row['medium']!='nan': logger.info(row['medium']) logger.info(type(row['medium'])) raise IOError('Edge ' + str(index) + ' in edge file: Error - support does not support medium keyword; specified:' +str(row['medium'])) # check that entities in the node file have been mentioned in edge file entities_edge_list = sourceNodeId_list + targetNodeId_list diff3 = set(self.listIDs) - set(entities_edge_list) if diff3: raise IOError('Error - Node file: these entities in the node file were not mentioned in the edge file: ' + str(diff3)) logger.info("Edges check: Pass") # Provide info of outgoing only nodes outgoingSet = set(sourceNodeId_list) - set(targetNodeId_list) logger.info('List of outgoing only nodes:' + str(outgoingSet)) # Provide info of ingoing only nodes ingoingSet = set(targetNodeId_list) - set(sourceNodeId_list) logger.info('List of ingoing only nodes:' + str(ingoingSet))
[docs] def returnIDs(self): """ Method designed to return list of IDs included in the model Args: None Returns: self.listIDs, list, list of IDs specified in the MBSE model """ return self.listIDs
[docs] def addNodesEdges(self, new_node_dict, new_edge_dicts): """ Method designed to return list of IDs included in the model Args: None Returns: self.listIDs, list, list of IDs specified in the MBSE model """ self.nodesDf.loc[len(self.nodesDf)] = new_node_dict for edge in new_edge_dicts: self.edgesDf.loc[len(self.edgesDf)] = edge self.listIDs = self.nodesDf['ID'].dropna().to_list()
[docs] def printOnFiles(self,nodes_file,edges_file): """ Method designed to print on file the set of nodes and edges Args: None Returns: self.listIDs, list, list of IDs specified in the MBSE model """ self.nodesDf.to_csv(nodes_file, index=False) self.edgesDf.to_csv(edges_file, index=False)