Source code for src.dackar.utils.mbse.customMBSEparser

# Copyright 2024, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

"""
Created on February, 2024

@author: mandd
"""

# External Imports
import pandas as pd
import logging


[docs]
logger = logging.getLogger("my logger")


[docs]
c_handler = logging.StreamHandler()

logger.addHandler(c_handler)
logger.setLevel(logging.DEBUG) 


[docs]
class customMBSEobject(object):
    """
        Class designed to process the a custom MBSE model from file.
    """
    def __init__(self, nodesFilename, edgesFilename):
        """
        Initialization method for the custom MBSE model class

        Args:

            nodesFilename: file, file in .csv format containing all nodes 
            edgesFilename: file, file in .csv format containing all edges 

        Returns:

            None
        """

[docs]
        self.nodesFilename = nodesFilename


[docs]
        self.edgesFilename = edgesFilename


[docs]
        self.listIDs = []



[docs]
        self.allowedNodeTypes = ['entity']


[docs]
        self.allowedEdgeTypes = ['link','composition','support'] # to be developed: 'opm_instance'



[docs]
        self.allowedNodeCols = ['label','ID','type']


[docs]
        self.allowed_edge_cols = ['sourceNodeId','targetNodeId','type','medium']


        self.parseFiles()
        self.checkNodes()
        self.checkEdges()

        nodesFileSplit =  self.nodesFilename.split('.')
        nodesFileKg = nodesFileSplit[0] + '_kg.' + nodesFileSplit[1]

        edgesFileSplit =  self.edgesFilename.split('.')
        edgesFileKg = edgesFileSplit[0] + '_kg.' + edgesFileSplit[1]

        self.printOnFiles(nodesFileKg,edgesFileKg)
    

[docs]
    def checkModel(self):
        """
        Method designed to pcheck model consistency

        Args:

            None

        Returns:

            None
        """
        self.checkNodes()
        self.checkEdges()       

    

[docs]
    def parseFiles(self):
        """
        Method designed to parse the node and edge files

        Args:

            None

        Returns:

            None
        """
        # parse nodes
        self.nodesDf = pd.read_csv(self.nodesFilename, sep=',', skip_blank_lines=True, dtype=str)
        self.nodesDf.dropna(how='all', inplace=True)
        self.nodesDf = self.nodesDf.apply(lambda x: x.astype(str).str.lower())

        self.listIDs = self.nodesDf['ID'].dropna().to_list()
        
        # parse edges
        self.edgesDf = pd.read_csv(self.edgesFilename, sep=',', skip_blank_lines=True, dtype=str)
        self.edgesDf.dropna(how='all', inplace=True)
        self.edgesDf = self.edgesDf.apply(lambda x: x.astype(str).str.lower())




[docs]
    def checkNodes(self):
        """
        Method designed to check the node file

        Args:

            None

        Returns:

            None
        """
        logger.info('- Check node file -')
        # Check all columns are present
        cols = self.nodesDf.columns.tolist()
        if set(cols)!=set(self.allowedNodeCols):
            raise IOError('Node file structure check - Error: wrong set of provided columns ' + str(cols) + ' (allowed: label, ID, type)')
        else:
             logger.info('Node file structure check - Pass')

        # Check for duplicate IDs
        duplicateIDs = self.nodesDf.duplicated()

        if self.nodesDf[duplicateIDs].empty:
             logger.info("List of node IDs check - Pass")
        else:
             logger.info("List of node IDs check - Error: duplicate IDs were found:")
             logger.info(self.nodesDf[duplicateIDs])
        
        #check for structure of each row
        logger.info("Entity check...")
        for index, row in self.nodesDf.iterrows():
            if row['type'] not in set(self.allowedNodeTypes):
                raise IOError('Type of row ' + str(index) + ' in node file is not allowed. Allowed types: ' +str(self.allowedNodeTypes))
            
            if pd.isnull(row['type']) and pd.isnull(row['ID']):
                raise IOError('Entity of row ' + str(index) + ' in node file: Error - neither type nor ID have been specified')
        logger.info("Entities check: Pass")



[docs]
    def checkEdges(self):
        """
        Methods designed to check the edge file

        Args:

            None

        Returns:

            None
        """
        logger.info('- Check edge file -')
        # Check all columns are present
        cols = self.edgesDf.columns.tolist()
        if set(cols)!=set(self.allowed_edge_cols):
            raise IOError('Edge file structure check - Error: wrong set of provided columns (allowed: sourceNodeId,targetNodeId,type,medium)')
        else:
             logger.info('Edge file structure check - Pass')

        # Check for duplicate edges
        duplicateEdges = self.edgesDf[['sourceNodeId','targetNodeId']].duplicated()

        if self.edgesDf[duplicateEdges].empty:
             logger.info("List of edges check - Pass")
        else:
            logger.info("List of edges check - Error: duplicate edges were found:")
            logger.info(self.edgesDf[duplicateEdges])

        # Check IDs in edge file are defined in node file
        sourceNodeId_list = self.edgesDf['sourceNodeId'].to_list()
        diff1 = set(sourceNodeId_list) - set(self.listIDs)
        if diff1:
            raise IOError('Error - Edge file: not recognized entities: ' + str(diff1))

        targetNodeId_list = self.edgesDf['targetNodeId'].to_list()
        diff2 = set(targetNodeId_list) - set(self.listIDs)
        if diff2:
            raise IOError('Error - Edge file: not recognized entities: ' + str(diff2))

        # Check for structure of each row
        logger.info("Edges check...")
        for index, row in self.edgesDf.iterrows():
            if pd.isnull(row['sourceNodeId']) or pd.isnull(row['targetNodeId']):
                logger.info(row)
                raise IOError('Edge ' + str(index) + ' in edge file: Error - both sourceNodeId and targetNodeId need to be specified')
             
            if row['type'] not in set(self.allowedEdgeTypes):
                logger.info(row)
                raise IOError('Type of row ' + str(index) + ' in edge file is not allowed. Allowed types: ' +str(self.allowedEdgeTypes))
            
            if row['type']=='link' and pd.isnull(row['medium']):
                logger.info(row)
                raise IOError('Edge ' + str(index) + ' in edge file: Error - link does not have a medium specified')

            if row['type']=='support' and row['medium']!='nan':
                logger.info(row['medium'])
                logger.info(type(row['medium']))
                raise IOError('Edge ' + str(index) + ' in edge file: Error - support does not support medium keyword; specified:' +str(row['medium']))

        # check that entities in the node file have been mentioned in edge file
        entities_edge_list = sourceNodeId_list + targetNodeId_list
        diff3 = set(self.listIDs) - set(entities_edge_list)
        if diff3:
            raise IOError('Error - Node file: these entities in the node file were not mentioned in the edge file: ' + str(diff3))        
        logger.info("Edges check: Pass")
        
        # Provide info of outgoing only nodes
        outgoingSet = set(sourceNodeId_list) - set(targetNodeId_list)
        logger.info('List of outgoing only nodes:' + str(outgoingSet))
        # Provide info of ingoing only nodes
        ingoingSet = set(targetNodeId_list) - set(sourceNodeId_list)
        logger.info('List of ingoing only nodes:' + str(ingoingSet))



[docs]
    def returnIDs(self):
        """
        Method designed to return list of IDs included in the model

        Args:

            None

        Returns:

            self.listIDs, list, list of IDs specified in the MBSE model
        """
        return self.listIDs

    

[docs]
    def addNodesEdges(self, new_node_dict, new_edge_dicts):
        """
        Method designed to return list of IDs included in the model

        Args:

            None

        Returns:

            self.listIDs, list, list of IDs specified in the MBSE model
        """        
        self.nodesDf.loc[len(self.nodesDf)] = new_node_dict
        
        for edge in new_edge_dicts:
            self.edgesDf.loc[len(self.edgesDf)] = edge
        
        self.listIDs = self.nodesDf['ID'].dropna().to_list()



[docs]
    def printOnFiles(self,nodes_file,edges_file):
        """
        Method designed to print on file the set of nodes and edges

        Args:

            None

        Returns:

            self.listIDs, list, list of IDs specified in the MBSE model
        """  

        self.nodesDf.to_csv(nodes_file, index=False)
        self.edgesDf.to_csv(edges_file, index=False)