Source code for bahamas.cccg

# Copyright 2025, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

"""
Created on September 7, 2025
@author: wangc, chene
"""
import pandas as pd
import warnings
import sys
import os
import copy
warnings.filterwarnings('ignore')

import logging


[docs]
logger = logging.getLogger('BAHAMAS.CCCG')


########### internal functions


[docs]
def check_valid(src_arr):
    """Check if group is empty

    Args:
        src_arr (list or pd.DataFrame): data to check

    Returns:
        bool: True if the group is not empty, else False
    """
    if len(src_arr) == 0:
        return False

    return True



[docs]
def clean_string(s):
    """Remove "None" and "nan" from str that are separated by ";"

    Args:
        s (str): string to clean

    Returns:
        str: cleaned string
    """
    if pd.isna(s):
        return s
    return ';'.join([item for item in s.split(';') if item != 'None' and item != 'nan'])



[docs]
def merge_multi_lists(*arg):
    """Merge multiple pd.DataFrame CCCG data and remove duplicates

    Args:
        arg (list): list of pd.DataFrame CCCG data

    Returns:
        list: merged CCCG data without duplications
    """
    master = list()
    for pd_multi in arg:
        master.extend(pd_multi)
    master = drop_dup(master)
    return master



[docs]
def drop_dup(list_pd):
    """Drop duplicated CCCG data

    Args:
        list_pd (list): list of pd.DataFrame (CCCG data)

    Returns:
        list: list of pd.DataFrame (CCCG data) without duplications
    """
    filtered_list_pd = list()

    flag = False

    for pd_tmp in list_pd:
        if len(filtered_list_pd) == 0:
            filtered_list_pd.append(list_pd[0])
            continue

        for c in filtered_list_pd:
            # Drop index comparison
            c = c.reset_index(drop=True)
            pd_tmp = pd_tmp.reset_index(drop=True)
            if pd_tmp.equals(c):
                flag = True
        if not flag:
            filtered_list_pd.append(pd_tmp)
        flag = False

    return filtered_list_pd



[docs]
def unique_cells(src_arr):
    """Identify the unique inputs/designs/functions for the given coupling factor (i.e., Input Function and Design)

    Args:
        src_arr (pd.DataFrame): expanded coupling factor data for single coupling factor

    Returns:
        list: list of unique inputs/designs/functions among all components
    """
    arr_cols = pd.concat([src_arr[col] for col in src_arr], ignore_index=True)
    arr_unique = arr_cols.unique()
    arr_unique = [cell for cell in arr_unique if pd.notna(cell)]

    return arr_unique



[docs]
def match_CCCG_on(src_arr, col_name):
    r"""Get a list of CCCGs based on single coupling factor, meaning the CCCGs are grouped because
    they have at least one shared variable of the given coupling factor

    Args:
        src_arr (pd.DataFrame): Component table with expanded coupling factors
        col_name (str): coupling factor, Function\_, Input\_ or Design\_

    Returns:
        list: list of CCCGs based on single coupling factor, arranged in the order \*_1, \*_2, \*_3, etc. where \* represents the coupling factor
    """
    CCCG_arr = list()

    # Get columns that start with the coupling factor name, i.e., Function_, Input_, and Design_
    arr_cols = [col for col in src_arr.columns if col.startswith(col_name)]

    # Get unique values for single coupling factor among all components
    tmp = unique_cells(src_arr[arr_cols])

    # Filter the DataFrame to find rows where any col_name column contains the search string
    for t in tmp:
        matching_rows = src_arr[arr_cols].apply(lambda row: row == t).any(axis=1)

        # Get the rows that match the search string
        result_df = src_arr[matching_rows]
        if len(result_df) == 0:
            continue

        # Result is a CCCG
        CCCG_arr.append(result_df)

    CCCG_arr = drop_dup(CCCG_arr)

    return CCCG_arr



[docs]
def match_CCCG_list_on(list_pd, col_name):
    r"""Identify CCCGs that are also have the "col_name" coupling factor. (This function is used for two or more shared coupling factors.)

    Args:
        list_pd (list): list of pd.DataFrame of CCCGs
        col_name (str): coupling factor, Function\_, Input\_ or Design\_

    Returns:
        list: CCCGs with the additional coupling factor ("col_name")
    """
    CCCG_arr = list()

    for pd_tmp in list_pd:
        tmp = match_CCCG_on(pd_tmp, col_name)
        CCCG_arr.extend(tmp)

    CCCG_arr = drop_dup(CCCG_arr)

    return CCCG_arr



[docs]
def match_multi_on(src_arr, col_name):
    """Extract a CCCG based on col_name

    Args:
        src_arr (pd.DataFrame): pd.DataFrame of CCCGs
        col_name (str): coupling factor

    Returns:
        list: extracted the CCCGs based on the given column name
    """
    # Partially working, only matches on single dependency. Multiple dependencies matching is not implemented.
    CCCG_arr = list()

    # Get columns that start with the coupling factor name
    arr_cols = [col for col in src_arr.columns if col.startswith(col_name)]
    arr_rows = src_arr[arr_cols]

    # Filter the DataFrame to find rows where there are multiple dependencies
    for col_name in arr_cols:
        # Check if multiple dependencies
        matching_rows = src_arr[col_name].notna()

        tmp = src_arr[matching_rows].reset_index(drop=True)

        # Result is a CCCG
        for i in range(len(tmp)):
            row = tmp.loc[[i]]
            CCCG_arr.append(row)

    CCCG_arr = drop_dup(CCCG_arr)

    return CCCG_arr



[docs]
def match_multi_list_on(list_pd, col_name):
    """Extract the CCCGs from a list of pandas.DataFrame

    Args:
        list_pd (list): list of pd.DataFrame
        col_name (str): coupling factor name

    Returns:
        list: list of extracted CCCGs
    """
    CCCG_arr = list()

    for pd_tmp in list_pd:
        tmp = match_multi_on(pd_tmp, col_name)

        for t in tmp:
            CCCG_arr.append(t)

    CCCG_arr = drop_dup(CCCG_arr)

    return CCCG_arr



[docs]
class CCCG(object):
    """
    Compute CCCGs for the given list of components of diversity and redundancy system
    """

    def __init__(self, file):

[docs]
        self._sys_diagram = file


[docs]
        self._cccg_final = []


[docs]
        self._cccg_function = []


[docs]
        self._cccg_input = []


[docs]
        self._cccg_design = []


[docs]
        self._cccg_single = []


[docs]
        self._cccg_function_input = []


[docs]
        self._cccg_function_design = []


[docs]
        self._cccg_input_function = []


[docs]
        self._cccg_input_design = []


[docs]
        self._cccg_design_input = []


[docs]
        self._cccg_design_function = []


[docs]
        self._cccg_double = []


[docs]
        self._cccg_function_input_design = []


[docs]
        self._cccg_function_design_input = []


[docs]
        self._cccg_input_design_function = []


[docs]
        self._cccg_input_function_design = []


[docs]
        self._cccg_design_input_function = []


[docs]
        self._cccg_design_function_input = []


[docs]
        self._cccg_triple = []


[docs]
        self._function_all = []


[docs]
        self._input_all = []


[docs]
        self._design_all = []

        # self._output_file = "CCCGs.csv"


[docs]
    def generate(self, config=None):
        """Generate CCCGs based on three coupling factors (i.e., Function, Input, and Design)

        Args:
            config (dict, optional): config file for output control. Defaults to None.
        """
        logger.info("Generating")
        # Read data file into Pandas
        df_pd = pd.read_csv(self._sys_diagram)

        # Expand Function_Config
        func_big = df_pd["Function_Config"].str.split('; ', expand=True)
        func_big.columns = [f'Function_{i+1}' for i in range(func_big.shape[1])]

        df_pd_tmp = pd.concat([df_pd.drop(columns=['Function_Config']), func_big], axis=1)

        # Expand Input_Config
        inpt_big = df_pd["Input_Config"].str.split('; ', expand=True)
        inpt_big.columns = [f'Input_{i+1}' for i in range(inpt_big.shape[1])]

        df_pd_tmp = pd.concat([df_pd_tmp.drop(columns=['Input_Config']), inpt_big], axis=1)

        # Expand Design Config
        dsgn_big = df_pd["Design_Config"].str.split('; ', expand=True)
        dsgn_big.columns = [f'Design_{i+1}' for i in range(dsgn_big.shape[1])]

        df_pd_tmp = pd.concat([df_pd_tmp.drop(columns=['Design_Config']), dsgn_big], axis=1)

        # Get CCCG on single coupling factor
        self._cccg_function = match_CCCG_on(df_pd_tmp, 'Function_') # list[first variable pd.DataFrame, second variable pd.DataFrame, ...]
        self._cccg_design   = match_CCCG_on(df_pd_tmp, 'Design_')
        self._cccg_input    = match_CCCG_on(df_pd_tmp, 'Input_')

        # Merge all CCCGs on single coupling factor and drop duplicates
        self._cccg_single   = merge_multi_lists(self._cccg_function,
                                        self._cccg_design,
                                        self._cccg_input
                                        )
        # remove single entry since there is no other component that has shared coupling factors
        self._cccg_single = [x for x in self._cccg_single if len(x) != 1]

        # Get CCCG on two coupling factor
        self._cccg_function_input  = match_CCCG_list_on(self._cccg_function, 'Input_')
        self._cccg_function_design = match_CCCG_list_on(self._cccg_function, 'Design_')

        self._cccg_input_function  = match_CCCG_list_on(self._cccg_input, 'Function_')
        self._cccg_input_design = match_CCCG_list_on(self._cccg_input, 'Design_')

        self._cccg_design_input  = match_CCCG_list_on(self._cccg_design, 'Input_')
        self._cccg_design_function = match_CCCG_list_on(self._cccg_design, 'Function_')

        # Merge all CCCGs on two coupling factor and drop duplicates
        self._cccg_double   = merge_multi_lists(self._cccg_function_design,
                                        self._cccg_function_input,
                                        self._cccg_design_input,
                                        self._cccg_design_function,
                                        self._cccg_input_design,
                                        self._cccg_input_function
                                        )
        # remove single entry since there is no other component that has shared coupling factors
        self._cccg_double = [x for x in self._cccg_double if len(x) != 1]

        # Get CCCG on three coupling factor
        self._cccg_function_input_design = match_CCCG_list_on(self._cccg_function_input, 'Design_')
        self._cccg_function_design_input = match_CCCG_list_on(self._cccg_function_design, 'Input_')

        self._cccg_input_design_function = match_CCCG_list_on(self._cccg_input_design, 'Function_')
        self._cccg_input_function_design = match_CCCG_list_on(self._cccg_input_function, 'Design_')

        self._cccg_design_input_function = match_CCCG_list_on(self._cccg_design_input, 'Function_')
        self._cccg_design_function_input = match_CCCG_list_on(self._cccg_design_function, 'Input_')

        # Merge all CCCGs on triple coupling factor and drop duplicates
        self._cccg_triple   = merge_multi_lists(self._cccg_function_input_design,
                                        self._cccg_function_design_input,
                                        self._cccg_input_design_function,
                                        self._cccg_input_function_design,
                                        self._cccg_design_input_function,
                                        self._cccg_design_function_input,
                                        )
        # remove single entry since there is no other component that has shared coupling factors
        self._cccg_triple = [x for x in self._cccg_triple if len(x) != 1]

        # Merge all CCCGs and drop duplicates
        self._cccg_final = merge_multi_lists(self._cccg_single,
                                    self._cccg_double,
                                    self._cccg_triple
        )

        # Find all high dependency groups that have function dependency
        self._function_all = match_multi_list_on(self._cccg_final, 'Function_')
        # Find all high dependency groups that have input dependency
        self._input_all = match_multi_list_on(self._cccg_final, 'Input_')
        # Find all high dependency groups that have input dependency
        self._design_all = match_multi_list_on(self._cccg_final, 'Design_')

        cccg_final = self.aggregate(self._cccg_final)

        file_base = config['output_file_base']
        # output_type = config['output_type']

        if config['final']:
            filename = file_base + '_final.csv'
            self.write(cccg_final, filename)
        if config['single']:
            cccg_single = self.aggregate(self._cccg_single)
            filename = file_base +'_single.csv'
            self.write(cccg_single, filename)
        if config['double']:
            cccg_double = self.aggregate(self._cccg_double)
            filename = file_base + '_double.csv'
            self.write(cccg_double, filename)
        if config['triple']:
            cccg_triple = self.aggregate(self._cccg_triple)
            filename = file_base + '_triple.csv'
            self.write(cccg_triple, filename)

        # if config['function_all']:
        #   cccg_function_all = self.aggregate(self._function_all)
        #   filename = file_base + '_function_all.csv'
        #   self.write(cccg_function_all, filename)
        # if config['input_all']:
        #   cccg_input_all = self.aggregate(self._input_all)
        #   filename = file_base + '_input_all.csv'
        #   self.write(cccg_input_all, filename)
        # if config['design_all']:
        #   cccg_design_all = self.aggregate(self._design_all)
        #   filename = file_base + '_design_all.csv'
        #   self.write(cccg_design_all, filename)



[docs]
    def get(self, name):
        """Get CCCGs

        Args:
            name (str): name for CCCGs group

        Returns:
            list (list of Pandas.DataFrame): list of CCCGs
        """
        if name == 'final':
            return self.aggregate(self._cccg_final)
        elif name == 'single':
            return self.aggregate(self._cccg_single)
        elif name == 'double':
            return self.aggregate(self._cccg_double)
        elif name == 'triple':
            return self.aggregate(self._cccg_triple)
        else:
            raise IOError(f'Unrecognized name "{name}"!')



[docs]
    def write(self, data, fname, style='csv'):
        """Dump data

        Args:
            data (list of pandas.DataFrame): output data to dump
            fname (str): file name to save the data
            style (str, optional): type of file. Defaults to "csv".
        """
        logger.info("Save CCCGs into %s", fname)
        with open(fname, 'w', newline='', encoding='utf-8') as f:
            for df in data:
                # Write each DataFrame to the file
                df.to_csv(f, index=False)
                f.write('\n')




[docs]
    def aggregate(self, cccgs):
        """Aggregate expanded Function, Input, and Design into Function_Config, Input_Config, and Design_Config

        Args:
            cccgs (list): list of pd.DataFrame (i.e., CCCGs)

        Returns:
            list: list of aggregated CCCGs
        """
        cccg_list = copy.deepcopy(cccgs) # deepcopy is required, otherwise data manipulation
        cccg_transfer = []
        for pd_arr in cccg_list:
            coupling_factor = []
            function_columns = [col for col in pd_arr.columns if col.startswith('Function_')]
            if len(function_columns) > 0:
                pd_arr["Function_Config"] = pd_arr[function_columns].apply(lambda row: ';'.join(sorted(row.values.astype(str))), axis=1)
                pd_arr.drop(columns=function_columns, inplace=True)
                pd_arr["Function_Config"] = pd_arr["Function_Config"].apply(clean_string)
                if len(pd_arr["Function_Config"]) > 1 and pd_arr["Function_Config"].nunique() == 1 and pd_arr["Function_Config"].iloc[0] != '':
                    coupling_factor.append('Function')

            input_columns = [col for col in pd_arr.columns if col.startswith('Input_')]
            if len(input_columns) > 0:
                pd_arr["Input_Config"] = pd_arr[input_columns].apply(lambda row: ';'.join(sorted(row.values.astype(str))), axis=1)
                pd_arr.drop(columns=input_columns, inplace=True)
                pd_arr["Input_Config"] = pd_arr["Input_Config"].apply(clean_string)
                if len(pd_arr["Input_Config"]) > 1 and pd_arr["Input_Config"].nunique() == 1 and pd_arr["Input_Config"].iloc[0] != '':
                    coupling_factor.append('Input')

            design_columns = [col for col in pd_arr.columns if col.startswith('Design_')]
            if len(design_columns) > 0:
                pd_arr["Design_Config"] = pd_arr[design_columns].apply(lambda row: ';'.join(sorted(row.values.astype(str))), axis=1)
                pd_arr.drop(columns=design_columns, inplace=True)
                pd_arr["Design_Config"] = pd_arr["Design_Config"].apply(clean_string)
                if len(pd_arr["Design_Config"]) > 1 and pd_arr["Design_Config"].nunique() == 1 and pd_arr["Design_Config"].iloc[0] != '':
                    coupling_factor.append('Design')
            if len(coupling_factor) > 0:
                pd_arr['Coupling_Factor'] = ";".join(coupling_factor)
            cccg_transfer.append(pd_arr)

        return cccg_transfer