import os
from typing import List, Optional, Tuple, Union
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import colors
from scipy.stats import beta
from beetroots.inversion.plots.map_shaper import MapShaper
from beetroots.inversion.results.utils.abstract_util import ResultsUtil
from beetroots.sampler.abstract_sampler import Sampler
[docs]
class ResultsBayesPvalues(ResultsUtil):
r"""Bayesian model checking accounting for uncertainties on the p-value due to Monte Carlo evaluation.
The method is described in :cite:t:`paludProblemesInversesTest2023a`.
"""
__slots__ = (
"model_name",
"path_img",
"path_data_csv_out",
"N_MCMC",
"N",
"D",
)
CONFIDENCE_THRESHOLD_ALPHA = 0.05
r"""..., denoted :math:`\alpha` in the article"""
CONFIDENCE_THRESHOLD_DELTA = 0.1
r"""..., denoted :math:`\delta` in the article"""
ESS_OPTIM = Sampler.ESS_OPTIM * 1
r"""number of random reproduced observations to draw to evaluate the model checking p-value for optimization procedures"""
def __init__(
self,
model_name: str,
chain_type: str,
path_img: str,
path_data_csv_out: str,
N_MCMC: int,
N: int,
D_sampling: int,
plot_ESS: bool,
):
assert chain_type in ["mcmc", "optim_map"]
self.model_name = model_name
self.chain_type = chain_type
self.path_img = path_img
self.path_data_csv_out = path_data_csv_out
self.N_MCMC = N_MCMC
self.N = N
self.D_sampling = D_sampling
self.plot_ESS = plot_ESS
[docs]
def read_data(self) -> Tuple[Optional[pd.DataFrame], pd.DataFrame]:
# ess
if self.chain_type == "mcmc":
if self.plot_ESS:
path_file = f"{self.path_data_csv_out}/"
path_file += f"estimation_ESS_{self.model_name}.csv"
df_ess_model = pd.read_csv(path_file, index_col=["n", "d"])
df_ess_model = df_ess_model.sort_index()
df_ess_model = df_ess_model.reset_index(drop=False)
else:
df_ess_model = None
else:
list_dicts = [
{"n": n, "d": d, "ess": self.ESS_OPTIM, "seed": "overall"}
for n in range(self.N)
for d in range(self.D_sampling)
]
df_ess_model = pd.DataFrame.from_records(list_dicts)
df_ess_model = df_ess_model.set_index(["n", "d"])
df_ess_model = df_ess_model.sort_index().reset_index(drop=False)
# estimated pval
path_file = f"{self.path_data_csv_out}/"
path_file += f"p_value_llh_{self.model_name}.csv"
df_pval = pd.read_csv(path_file, index_col=["seed", "n"])
df_pval = df_pval.sort_index().reset_index(drop=False)
assert df_ess_model is None or len(df_ess_model) == self.N * self.D_sampling
assert len(df_pval) == self.N * self.N_MCMC
return df_ess_model, df_pval
[docs]
def create_folders(self) -> str:
folder_path_inter = f"{self.path_img}/p-values"
folder_path = f"{folder_path_inter}/{self.model_name}"
for path_ in [folder_path_inter, folder_path]:
if not os.path.isdir(path_):
os.mkdir(path_)
return folder_path
[docs]
def main(
self,
list_idx_sampling: List[int],
map_shaper: Optional[MapShaper],
) -> None:
if self.N < 1:
msg = "this function should only be called when N > 1 "
msg += "to avoid 1-pixel maps"
raise ValueError(msg)
df_ess_model, df_pval = self.read_data()
folder_path = self.create_folders()
print("starting Bayesian p-value plots")
if self.plot_ESS:
assert df_ess_model is not None
df_ess_overall = df_ess_model[(df_ess_model["seed"] == "overall")]
df_ess = df_ess_overall.groupby("n")[["ess"]].min()
df_ess = df_ess.sort_index()
ess = df_ess["ess"].values
list_decisions = []
for seed in range(self.N_MCMC):
df_pval_seed = df_pval[df_pval["seed"] == seed]
df_pval_seed = df_pval_seed.sort_values("n")
pval_estim = df_pval_seed["p-value-llh"].values
decision_estim_seed = np.where(
pval_estim < self.CONFIDENCE_THRESHOLD_ALPHA,
-1,
1,
)
if self.plot_ESS:
proba_reject = np.zeros((self.N,))
for n in range(self.N):
rv = beta(
1 + ess[n] * pval_estim[n],
1 + ess[n] * (1 - pval_estim[n]),
)
proba_reject[n] = rv.cdf(self.CONFIDENCE_THRESHOLD_ALPHA)
decision_pval_bayes = np.where(
proba_reject > 1 - self.CONFIDENCE_THRESHOLD_DELTA,
-1,
np.where(
proba_reject < self.CONFIDENCE_THRESHOLD_DELTA,
1,
0,
),
)
else:
proba_reject = [None for n in range(self.N)]
decision_pval_bayes = [None for n in range(self.N)]
list_decisions += [
{
"seed": seed,
"n": n,
"pval_estim": pval_estim[n],
"decision_estim_seed": decision_estim_seed[n],
"proba_reject": proba_reject[n],
"decision_pval_bayes": decision_pval_bayes[n],
}
for n in range(self.N)
]
if map_shaper is not None:
pval_estim_seed_shaped = map_shaper.from_vector_to_map(pval_estim)
decision_estim_seed_shaped = map_shaper.from_vector_to_map(
decision_estim_seed
)
if proba_reject[0] is not None:
proba_reject_shaped = map_shaper.from_vector_to_map(proba_reject)
decision_pval_bayes_shaped = map_shaper.from_vector_to_map(
decision_pval_bayes
)
plt.figure(figsize=(8, 6))
title = "Estimated p-values"
if self.N_MCMC > 1:
title += f" for seed={seed}"
plt.title(title)
plt.imshow(
pval_estim_seed_shaped,
norm=colors.LogNorm(),
cmap="viridis",
origin="lower",
)
plt.colorbar()
# plt.tight_layout()
filename = f"{folder_path}/pval_estim_{self.chain_type}"
filename += f"_seed{seed}.PNG"
plt.savefig(filename, bbox_inches="tight")
plt.close()
#
plt.figure(figsize=(8, 6))
title = "Model rejection from estimated p-values"
if self.N_MCMC > 1:
title += f" (seed={seed})"
plt.title(title)
plt.imshow(
decision_estim_seed_shaped,
cmap=colors.ListedColormap(["black", "white"]),
origin="lower",
)
colorbar = plt.colorbar(ticks=[-1 / 2, 1 / 2])
colorbar.set_ticklabels(
["reject", "reproduced"],
rotation=90,
va="center",
)
filename = f"{folder_path}/decision_pval_estim_{self.chain_type}"
filename += f"_seed{seed}.PNG"
plt.savefig(filename, bbox_inches="tight")
# plt.tight_layout()
plt.close()
# * with bayesian computation of pval
# Bayesian p-values
if self.plot_ESS:
plt.figure(figsize=(8, 6))
plt.title(
r"Proba. reject per pixel $\mathbb{P}[p_n \leq \alpha]$",
)
plt.imshow(
proba_reject_shaped,
cmap="viridis",
origin="lower",
)
plt.colorbar()
# plt.tight_layout()
plt.savefig(
f"{folder_path}/proba_reject_{self.chain_type}_seed{seed}.PNG",
bbox_inches="tight",
)
plt.close()
# decision process
plt.figure(figsize=(8, 6))
plt.title("Model rejection from p-values with uncertainties")
plt.imshow(
decision_pval_bayes_shaped,
cmap=colors.ListedColormap(["black", "grey", "white"]),
origin="lower",
norm=colors.Normalize(vmin=-1, vmax=1),
)
colorbar = plt.colorbar(ticks=[-2 / 3, 0, 2 / 3])
colorbar.set_ticklabels(
["reject", "unclear", "no reject"],
rotation=90,
va="center",
)
plt.savefig(
f"{folder_path}/decision_from_bayes_pval_{self.chain_type}_seed{seed}.PNG",
bbox_inches="tight",
)
plt.close()
df_pval = pd.DataFrame.from_records(list_decisions)
df_pval.to_csv(f"{self.path_data_csv_out}/pvalues_analysis.csv")
print("Bayesian p-value plots plots done")
return