# Copyright 2024, Battelle Energy Alliance, LLC ALL RIGHTS RESERVED
"""
Created on February, 2024
@author: mandd
"""
# External Imports
import pandas as pd
import logging
[docs]
logger = logging.getLogger("my logger")
[docs]
c_handler = logging.StreamHandler()
logger.addHandler(c_handler)
logger.setLevel(logging.DEBUG)
[docs]
class customMBSEobject(object):
"""
Class designed to process the a custom MBSE model from file.
"""
def __init__(self, nodesFilename, edgesFilename):
"""
Initialization method for the custom MBSE model class
Args:
nodesFilename: file, file in .csv format containing all nodes
edgesFilename: file, file in .csv format containing all edges
Returns:
None
"""
[docs]
self.nodesFilename = nodesFilename
[docs]
self.edgesFilename = edgesFilename
[docs]
self.allowedNodeTypes = ['entity']
[docs]
self.allowedEdgeTypes = ['link','composition','support'] # to be developed: 'opm_instance'
[docs]
self.allowedNodeCols = ['label','ID','type']
[docs]
self.allowed_edge_cols = ['sourceNodeId','targetNodeId','type','medium']
self.parseFiles()
self.checkNodes()
self.checkEdges()
nodesFileSplit = self.nodesFilename.split('.')
nodesFileKg = nodesFileSplit[0] + '_kg.' + nodesFileSplit[1]
edgesFileSplit = self.edgesFilename.split('.')
edgesFileKg = edgesFileSplit[0] + '_kg.' + edgesFileSplit[1]
self.printOnFiles(nodesFileKg,edgesFileKg)
[docs]
def checkModel(self):
"""
Method designed to pcheck model consistency
Args:
None
Returns:
None
"""
self.checkNodes()
self.checkEdges()
[docs]
def parseFiles(self):
"""
Method designed to parse the node and edge files
Args:
None
Returns:
None
"""
# parse nodes
self.nodesDf = pd.read_csv(self.nodesFilename, sep=',', skip_blank_lines=True, dtype=str)
self.nodesDf.dropna(how='all', inplace=True)
self.nodesDf = self.nodesDf.apply(lambda x: x.astype(str).str.lower())
self.listIDs = self.nodesDf['ID'].dropna().to_list()
# parse edges
self.edgesDf = pd.read_csv(self.edgesFilename, sep=',', skip_blank_lines=True, dtype=str)
self.edgesDf.dropna(how='all', inplace=True)
self.edgesDf = self.edgesDf.apply(lambda x: x.astype(str).str.lower())
[docs]
def checkNodes(self):
"""
Method designed to check the node file
Args:
None
Returns:
None
"""
logger.info('- Check node file -')
# Check all columns are present
cols = self.nodesDf.columns.tolist()
if set(cols)!=set(self.allowedNodeCols):
raise IOError('Node file structure check - Error: wrong set of provided columns ' + str(cols) + ' (allowed: label, ID, type)')
else:
logger.info('Node file structure check - Pass')
# Check for duplicate IDs
duplicateIDs = self.nodesDf.duplicated()
if self.nodesDf[duplicateIDs].empty:
logger.info("List of node IDs check - Pass")
else:
logger.info("List of node IDs check - Error: duplicate IDs were found:")
logger.info(self.nodesDf[duplicateIDs])
#check for structure of each row
logger.info("Entity check...")
for index, row in self.nodesDf.iterrows():
if row['type'] not in set(self.allowedNodeTypes):
raise IOError('Type of row ' + str(index) + ' in node file is not allowed. Allowed types: ' +str(self.allowedNodeTypes))
if pd.isnull(row['type']) and pd.isnull(row['ID']):
raise IOError('Entity of row ' + str(index) + ' in node file: Error - neither type nor ID have been specified')
logger.info("Entities check: Pass")
[docs]
def checkEdges(self):
"""
Methods designed to check the edge file
Args:
None
Returns:
None
"""
logger.info('- Check edge file -')
# Check all columns are present
cols = self.edgesDf.columns.tolist()
if set(cols)!=set(self.allowed_edge_cols):
raise IOError('Edge file structure check - Error: wrong set of provided columns (allowed: sourceNodeId,targetNodeId,type,medium)')
else:
logger.info('Edge file structure check - Pass')
# Check for duplicate edges
duplicateEdges = self.edgesDf[['sourceNodeId','targetNodeId']].duplicated()
if self.edgesDf[duplicateEdges].empty:
logger.info("List of edges check - Pass")
else:
logger.info("List of edges check - Error: duplicate edges were found:")
logger.info(self.edgesDf[duplicateEdges])
# Check IDs in edge file are defined in node file
sourceNodeId_list = self.edgesDf['sourceNodeId'].to_list()
diff1 = set(sourceNodeId_list) - set(self.listIDs)
if diff1:
raise IOError('Error - Edge file: not recognized entities: ' + str(diff1))
targetNodeId_list = self.edgesDf['targetNodeId'].to_list()
diff2 = set(targetNodeId_list) - set(self.listIDs)
if diff2:
raise IOError('Error - Edge file: not recognized entities: ' + str(diff2))
# Check for structure of each row
logger.info("Edges check...")
for index, row in self.edgesDf.iterrows():
if pd.isnull(row['sourceNodeId']) or pd.isnull(row['targetNodeId']):
logger.info(row)
raise IOError('Edge ' + str(index) + ' in edge file: Error - both sourceNodeId and targetNodeId need to be specified')
if row['type'] not in set(self.allowedEdgeTypes):
logger.info(row)
raise IOError('Type of row ' + str(index) + ' in edge file is not allowed. Allowed types: ' +str(self.allowedEdgeTypes))
if row['type']=='link' and pd.isnull(row['medium']):
logger.info(row)
raise IOError('Edge ' + str(index) + ' in edge file: Error - link does not have a medium specified')
if row['type']=='support' and row['medium']!='nan':
logger.info(row['medium'])
logger.info(type(row['medium']))
raise IOError('Edge ' + str(index) + ' in edge file: Error - support does not support medium keyword; specified:' +str(row['medium']))
# check that entities in the node file have been mentioned in edge file
entities_edge_list = sourceNodeId_list + targetNodeId_list
diff3 = set(self.listIDs) - set(entities_edge_list)
if diff3:
raise IOError('Error - Node file: these entities in the node file were not mentioned in the edge file: ' + str(diff3))
logger.info("Edges check: Pass")
# Provide info of outgoing only nodes
outgoingSet = set(sourceNodeId_list) - set(targetNodeId_list)
logger.info('List of outgoing only nodes:' + str(outgoingSet))
# Provide info of ingoing only nodes
ingoingSet = set(targetNodeId_list) - set(sourceNodeId_list)
logger.info('List of ingoing only nodes:' + str(ingoingSet))
[docs]
def returnIDs(self):
"""
Method designed to return list of IDs included in the model
Args:
None
Returns:
self.listIDs, list, list of IDs specified in the MBSE model
"""
return self.listIDs
[docs]
def addNodesEdges(self, new_node_dict, new_edge_dicts):
"""
Method designed to return list of IDs included in the model
Args:
None
Returns:
self.listIDs, list, list of IDs specified in the MBSE model
"""
self.nodesDf.loc[len(self.nodesDf)] = new_node_dict
for edge in new_edge_dicts:
self.edgesDf.loc[len(self.edgesDf)] = edge
self.listIDs = self.nodesDf['ID'].dropna().to_list()
[docs]
def printOnFiles(self,nodes_file,edges_file):
"""
Method designed to print on file the set of nodes and edges
Args:
None
Returns:
self.listIDs, list, list of IDs specified in the MBSE model
"""
self.nodesDf.to_csv(nodes_file, index=False)
self.edgesDf.to_csv(edges_file, index=False)