Source code for beetroots.inversion.results.utils.bayes_pval_plots

import os
from typing import List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import colors
from scipy.stats import beta

from beetroots.inversion.plots.map_shaper import MapShaper
from beetroots.inversion.results.utils.abstract_util import ResultsUtil
from beetroots.sampler.abstract_sampler import Sampler


[docs] class ResultsBayesPvalues(ResultsUtil): r"""Bayesian model checking accounting for uncertainties on the p-value due to Monte Carlo evaluation. The method is described in :cite:t:`paludProblemesInversesTest2023a`. """ __slots__ = ( "model_name", "path_img", "path_data_csv_out", "N_MCMC", "N", "D", ) CONFIDENCE_THRESHOLD_ALPHA = 0.05 r"""..., denoted :math:`\alpha` in the article""" CONFIDENCE_THRESHOLD_DELTA = 0.1 r"""..., denoted :math:`\delta` in the article""" ESS_OPTIM = Sampler.ESS_OPTIM * 1 r"""number of random reproduced observations to draw to evaluate the model checking p-value for optimization procedures""" def __init__( self, model_name: str, chain_type: str, path_img: str, path_data_csv_out: str, N_MCMC: int, N: int, D_sampling: int, plot_ESS: bool, ): assert chain_type in ["mcmc", "optim_map"] self.model_name = model_name self.chain_type = chain_type self.path_img = path_img self.path_data_csv_out = path_data_csv_out self.N_MCMC = N_MCMC self.N = N self.D_sampling = D_sampling self.plot_ESS = plot_ESS
[docs] def read_data(self) -> Tuple[Optional[pd.DataFrame], pd.DataFrame]: # ess if self.chain_type == "mcmc": if self.plot_ESS: path_file = f"{self.path_data_csv_out}/" path_file += f"estimation_ESS_{self.model_name}.csv" df_ess_model = pd.read_csv(path_file, index_col=["n", "d"]) df_ess_model = df_ess_model.sort_index() df_ess_model = df_ess_model.reset_index(drop=False) else: df_ess_model = None else: list_dicts = [ {"n": n, "d": d, "ess": self.ESS_OPTIM, "seed": "overall"} for n in range(self.N) for d in range(self.D_sampling) ] df_ess_model = pd.DataFrame.from_records(list_dicts) df_ess_model = df_ess_model.set_index(["n", "d"]) df_ess_model = df_ess_model.sort_index().reset_index(drop=False) # estimated pval path_file = f"{self.path_data_csv_out}/" path_file += f"p_value_llh_{self.model_name}.csv" df_pval = pd.read_csv(path_file, index_col=["seed", "n"]) df_pval = df_pval.sort_index().reset_index(drop=False) assert df_ess_model is None or len(df_ess_model) == self.N * self.D_sampling assert len(df_pval) == self.N * self.N_MCMC return df_ess_model, df_pval
[docs] def create_folders(self) -> str: folder_path_inter = f"{self.path_img}/p-values" folder_path = f"{folder_path_inter}/{self.model_name}" for path_ in [folder_path_inter, folder_path]: if not os.path.isdir(path_): os.mkdir(path_) return folder_path
[docs] def main( self, list_idx_sampling: List[int], map_shaper: Optional[MapShaper], ) -> None: if self.N < 1: msg = "this function should only be called when N > 1 " msg += "to avoid 1-pixel maps" raise ValueError(msg) df_ess_model, df_pval = self.read_data() folder_path = self.create_folders() print("starting Bayesian p-value plots") if self.plot_ESS: assert df_ess_model is not None df_ess_overall = df_ess_model[(df_ess_model["seed"] == "overall")] df_ess = df_ess_overall.groupby("n")[["ess"]].min() df_ess = df_ess.sort_index() ess = df_ess["ess"].values list_decisions = [] for seed in range(self.N_MCMC): df_pval_seed = df_pval[df_pval["seed"] == seed] df_pval_seed = df_pval_seed.sort_values("n") pval_estim = df_pval_seed["p-value-llh"].values decision_estim_seed = np.where( pval_estim < self.CONFIDENCE_THRESHOLD_ALPHA, -1, 1, ) if self.plot_ESS: proba_reject = np.zeros((self.N,)) for n in range(self.N): rv = beta( 1 + ess[n] * pval_estim[n], 1 + ess[n] * (1 - pval_estim[n]), ) proba_reject[n] = rv.cdf(self.CONFIDENCE_THRESHOLD_ALPHA) decision_pval_bayes = np.where( proba_reject > 1 - self.CONFIDENCE_THRESHOLD_DELTA, -1, np.where( proba_reject < self.CONFIDENCE_THRESHOLD_DELTA, 1, 0, ), ) else: proba_reject = [None for n in range(self.N)] decision_pval_bayes = [None for n in range(self.N)] list_decisions += [ { "seed": seed, "n": n, "pval_estim": pval_estim[n], "decision_estim_seed": decision_estim_seed[n], "proba_reject": proba_reject[n], "decision_pval_bayes": decision_pval_bayes[n], } for n in range(self.N) ] if map_shaper is not None: pval_estim_seed_shaped = map_shaper.from_vector_to_map(pval_estim) decision_estim_seed_shaped = map_shaper.from_vector_to_map( decision_estim_seed ) if proba_reject[0] is not None: proba_reject_shaped = map_shaper.from_vector_to_map(proba_reject) decision_pval_bayes_shaped = map_shaper.from_vector_to_map( decision_pval_bayes ) plt.figure(figsize=(8, 6)) title = "Estimated p-values" if self.N_MCMC > 1: title += f" for seed={seed}" plt.title(title) plt.imshow( pval_estim_seed_shaped, norm=colors.LogNorm(), cmap="viridis", origin="lower", ) plt.colorbar() # plt.tight_layout() filename = f"{folder_path}/pval_estim_{self.chain_type}" filename += f"_seed{seed}.PNG" plt.savefig(filename, bbox_inches="tight") plt.close() # plt.figure(figsize=(8, 6)) title = "Model rejection from estimated p-values" if self.N_MCMC > 1: title += f" (seed={seed})" plt.title(title) plt.imshow( decision_estim_seed_shaped, cmap=colors.ListedColormap(["black", "white"]), origin="lower", ) colorbar = plt.colorbar(ticks=[-1 / 2, 1 / 2]) colorbar.set_ticklabels( ["reject", "reproduced"], rotation=90, va="center", ) filename = f"{folder_path}/decision_pval_estim_{self.chain_type}" filename += f"_seed{seed}.PNG" plt.savefig(filename, bbox_inches="tight") # plt.tight_layout() plt.close() # * with bayesian computation of pval # Bayesian p-values if self.plot_ESS: plt.figure(figsize=(8, 6)) plt.title( r"Proba. reject per pixel $\mathbb{P}[p_n \leq \alpha]$", ) plt.imshow( proba_reject_shaped, cmap="viridis", origin="lower", ) plt.colorbar() # plt.tight_layout() plt.savefig( f"{folder_path}/proba_reject_{self.chain_type}_seed{seed}.PNG", bbox_inches="tight", ) plt.close() # decision process plt.figure(figsize=(8, 6)) plt.title("Model rejection from p-values with uncertainties") plt.imshow( decision_pval_bayes_shaped, cmap=colors.ListedColormap(["black", "grey", "white"]), origin="lower", norm=colors.Normalize(vmin=-1, vmax=1), ) colorbar = plt.colorbar(ticks=[-2 / 3, 0, 2 / 3]) colorbar.set_ticklabels( ["reject", "unclear", "no reject"], rotation=90, va="center", ) plt.savefig( f"{folder_path}/decision_from_bayes_pval_{self.chain_type}_seed{seed}.PNG", bbox_inches="tight", ) plt.close() df_pval = pd.DataFrame.from_records(list_decisions) df_pval.to_csv(f"{self.path_data_csv_out}/pvalues_analysis.csv") print("Bayesian p-value plots plots done") return