Source code for bahamas.stage_odc_distribution

# Copyright 2025, Battelle Energy Alliance, LLC  ALL RIGHTS RESERVED

import pandas as pd
from scipy.stats import beta
import logging

from .utils import ODC_types, SDLC_stages

[docs] logger = logging.getLogger('BAHAMAS.ODC')
[docs] def get_stage_odc_dist(excel_file, distribution='beta', sheet_name='ODC'): """Get the distribution of each ODC defect at the given SDLC stage P(Defect type|SDLC Stage) = dist_dict['SDLC Stage']['Defect type'] Args: excel_file (str): Filename of the excel file to read in distribution (str, optional): Type of distribution to use (defaults to "beta") sheet_name (str, optional): Name of the sheet with the ODC data (defaults to "ODC") """ logger.info('Construct ODC Conditional Distribution for each SDLC stage') alpha_prior = 0.5 beta_prior = 0.5 dist_dict = {} df = pd.read_excel(excel_file, sheet_name=sheet_name) for _, row in df.iterrows(): dist_dict[row.Stages] = {} total = row.Total for odc in ODC_types: val = getattr(row, odc) if distribution == 'beta': a = alpha_prior + val b = beta_prior + total - val dist = beta(a, b) else: raise IOError(f"Unsupported distribution type {distribution} for function 'get_odc_dist'") dist_dict[row.Stages][odc] = dist return dist_dict