Source code for beetroots.inversion.results.utils.mc_utils.histograms

from typing import Dict, Optional, Union

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from scipy import interpolate


[docs] def plot_1D_hist( list_Theta_lin_seed: np.ndarray, n: Optional[int], d: int, folder_path: str, title: str, lower_bounds_lin: Optional[np.ndarray], upper_bounds_lin: Optional[np.ndarray], seed: Optional[int] = None, estimator: Optional[float] = None, IC_low: Optional[float] = None, IC_high: Optional[float] = None, true_val: Optional[float] = None, ): if len(list_Theta_lin_seed.shape) == 3: list_Theta_lin_nd = list_Theta_lin_seed[:, n, d] * 1 else: list_Theta_lin_nd = list_Theta_lin_seed * 1 if lower_bounds_lin is None: lower_bounds_lin = np.min(list_Theta_lin_seed) * np.ones((10,)) if upper_bounds_lin is None: upper_bounds_lin = np.max(list_Theta_lin_seed) * np.ones((10,)) plt.figure(figsize=(8, 6)) plt.title(title) assert lower_bounds_lin is not None if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.hist(np.log10(list_Theta_lin_nd), bins=100, label="samples") title += " (in log10 scale)" else: plt.hist(list_Theta_lin_nd, bins=100, label="samples") if estimator is not None: if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.axvline(np.log10(estimator), c="orange", ls="-", label="mean") else: plt.axvline(estimator, c="orange", ls="-", label="mean") if np.all([IC_low is not None, IC_high is not None]): assert IC_low is not None and IC_high is not None if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.axvline(np.log10(IC_low), c="k", ls="--", label="CI") plt.axvline(np.log10(IC_high), c="k", ls="--") else: plt.axvline(IC_low, c="k", ls="--", label="CI") plt.axvline(IC_high, c="k", ls="--") if true_val is not None: if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.axvline(np.log10(true_val), c="red", ls="--", label="true") else: plt.axvline(true_val, c="red", ls="--", label="true") plt.grid() plt.legend() # plt.tight_layout() str_n = f"_n{n}" if n is not None else "" if seed is not None: plt.savefig(f"{folder_path}/hist{str_n}_d{d}_seed{seed}.PNG") else: plt.savefig(f"{folder_path}/hist{str_n}_d{d}_overall.PNG") plt.close() return
[docs] def plot_1D_chain( list_Theta_lin_nd: np.ndarray, n: Optional[int], d: int, folder_path: str, title: str, lower_bounds_lin: Optional[np.ndarray], upper_bounds_lin: Optional[np.ndarray], N_MCMC: int, T_MC: int, T_BI: int, true_val: Optional[float] = None, is_u: bool = False, ) -> None: assert len(list_Theta_lin_nd.shape) == 1 # (N_MCMC * (T_MC - T_BI),) if lower_bounds_lin is None: lower_bounds_lin = np.min(list_Theta_lin_nd) * np.ones((10,)) if upper_bounds_lin is None: upper_bounds_lin = np.max(list_Theta_lin_nd) * np.ones((10,)) assert lower_bounds_lin is not None plt.figure(figsize=(8, 6)) plt.title(title) plt.plot(list_Theta_lin_nd, label="MC chain") for seed in range(1, N_MCMC): if seed == 1: plt.axvline( seed * (T_MC - T_BI), c="k", ls="-", label="new MC", ) else: plt.axvline(seed * (T_MC - T_BI) - seed, c="k", ls="-") if true_val is not None: plt.axhline(true_val, c="r", ls="--", label="true value") if list_Theta_lin_nd.min() > 0 and lower_bounds_lin.min() > 0: plt.yscale("log") plt.grid() plt.legend() # plt.tight_layout() plt.xlabel("Iteration") str_n = f"_n{n}" if n is not None else "" if is_u: filename = f"{folder_path}/mc_1D{str_n}_ell{d}.PNG" else: filename = f"{folder_path}/mc_1D{str_n}_d{d}.PNG" plt.savefig(filename, bbox_inches="tight") plt.close()
[docs] def plot_2D_chain( list_Theta_lin_seed: np.ndarray, n: int, d1: int, d2: int, model_name: str, seed: int, folder_path: str, list_names: list, Theta_estimator: np.ndarray, Theta_true: Optional[np.ndarray] = None, ): # assert list_Theta_lin_seed.shape in [ # (self.T_MC - self.T_BI, self.N, self.D), # (self.N_MCMC * (self.T_MC - self.T_BI), self.N, self.D), # ] freq = 5 plt.figure(figsize=(8, 6)) title = f"Joint MC of ({list_names[d1]}, {list_names[d2]}) for pixel n={n}" plt.title(title) if list_Theta_lin_seed.min() > 0: plt.scatter( np.log10(list_Theta_lin_seed[::freq, n, d1]).flatten(), np.log10(list_Theta_lin_seed[::freq, n, d2]).flatten(), c=np.arange(list_Theta_lin_seed[::freq].shape[0]), s=10, label="samples", ) else: plt.scatter( list_Theta_lin_seed[::freq, n, d1].flatten(), list_Theta_lin_seed[::freq, n, d2].flatten(), c=np.arange(list_Theta_lin_seed[::freq].shape[0]), s=10, label="samples", ) if Theta_true is not None: plt.plot( np.log10([Theta_true[n, d1]]), np.log10([Theta_true[n, d2]]), "r+", ms=20, label="truth", ) plt.colorbar() plt.legend() plt.grid() if list_Theta_lin_seed.min() > 0: plt.xlabel(r"$\log_{10}$ " + list_names[d1]) plt.ylabel(r"$\log_{10}$ " + list_names[d2]) else: plt.xlabel(list_names[d1]) plt.ylabel(list_names[d2]) # plt.tight_layout() plt.savefig(f"{folder_path}/mc_2D_n{n}_d1{d1}_d2{d2}_seed{seed}_chain.PNG") plt.close()
[docs] def plot_2D_hist( list_Theta_lin_seed: np.ndarray, n: Optional[int], d1: int, d2: int, model_name: str, folder_path: str, list_names: list, lower_bounds_lin: np.ndarray, upper_bounds_lin: np.ndarray, Theta_MMSE: np.ndarray, true_val: Optional[np.ndarray] = None, seed: Union[int, str] = "overall", point_challenger: Dict = {}, ): # assert list_Theta_lin_seed.shape in [ # (self.T_MC - self.T_BI, self.N, self.D), # (self.N_MCMC * (self.T_MC - self.T_BI), self.N, self.D), # ] def _set_edges(): list_Theta_lin_seed_used = list_Theta_lin_seed * 1 # x-axis if list_Theta_lin_seed[:, 0].min() > 0 and lower_bounds_lin[d1] > 0: bounds_theta = ( np.log10(lower_bounds_lin[d1]) - 0.1, np.log10(upper_bounds_lin[d1]) + 0.1, ) list_Theta_lin_seed_used[:, 0] = np.log10(list_Theta_lin_seed_used[:, 0]) is_theta_log = True else: bounds_theta = ( lower_bounds_lin[d1] - 0.1, upper_bounds_lin[d1] + 0.1, ) is_theta_log = False # y-axis if list_Theta_lin_seed[:, 1].min() > 0 and lower_bounds_lin[d2] > 0: bounds_y = ( np.log10(lower_bounds_lin[d2]) - 0.1, np.log10(upper_bounds_lin[d2]) + 0.1, ) list_Theta_lin_seed_used[:, 1] = np.log10(list_Theta_lin_seed_used[:, 1]) is_y_log = True else: bounds_y = ( lower_bounds_lin[d2] - 0.1, upper_bounds_lin[d2] + 0.1, ) is_y_log = False return list_Theta_lin_seed_used, bounds_theta, bounds_y, is_theta_log, is_y_log ( list_Theta_lin_seed_used, bounds_theta, bounds_y, is_theta_log, is_y_log, ) = _set_edges() plt.figure(figsize=(8, 6)) title = f"Sample histogram of ({list_names[d1]}, {list_names[d2]})" if n is not None: title += f" of pixel n={n}" plt.title(title) plt.hist2d( list_Theta_lin_seed_used[:, 0].flatten(), list_Theta_lin_seed_used[:, 1].flatten(), (100, 100), range=[[bounds_theta[0], bounds_theta[1]], [bounds_y[0], bounds_y[1]]], norm=colors.LogNorm(), ) plt.plot( np.log10([Theta_MMSE[0]]) if is_theta_log else [Theta_MMSE[0]], np.log10([Theta_MMSE[1]]) if is_y_log else [Theta_MMSE[1]], "rx", ms=20, label="MMSE", markeredgewidth=3, ) if len(point_challenger) > 0: x_chal = point_challenger["value"] * 1 plt.plot( np.log10([x_chal[d1]]) if is_theta_log else [x_chal[d1]], np.log10([x_chal[d2]]) if is_y_log else [x_chal[d2]], "k+", ms=20, label=point_challenger["name"], markeredgewidth=3, ) if true_val is not None: plt.plot( np.log10([true_val[0]]) if is_theta_log else [true_val[0]], np.log10([true_val[1]]) if is_y_log else [true_val[1]], "r+", ms=20, label="truth", markeredgewidth=3, ) plt.legend(loc="best") # in case no point in acceptable set plt.colorbar() plt.grid() plt.xlabel(r"$\log_{10}$ " + list_names[d1] if is_theta_log else list_names[d1]) plt.ylabel(r"$\log_{10}$ " + list_names[d2] if is_y_log else list_names[d2]) # plt.tight_layout() str_n = f"_n{n}" if n is not None else "" if seed == "overall": filename = f"{folder_path}/hist2D{str_n}_d1{d1}_d2{d2}_overall_chain.PNG" else: filename = f"{folder_path}/hist2D{str_n}_d1{d1}_d2{d2}_seed{seed}" filename += "_chain.PNG" plt.savefig(filename) plt.close()
[docs] def plot_2D_proba_contours( list_Theta_lin_seed: np.ndarray, n: Optional[int], d1: int, d2: int, model_name: str, folder_path: str, list_names: list, lower_bounds_lin: np.ndarray, upper_bounds_lin: np.ndarray, Theta_MMSE: np.ndarray, true_val: Optional[np.ndarray] = None, seed: Union[int, str] = "overall", point_challenger: Dict = {}, ): """plots the 2D contours of the 68% and 95% high probability regions inspired from https://stackoverflow.com/questions/37890550/python-plotting-percentile-contour-lines-of-a-probability-distribution """ # assert list_Theta_lin_seed.shape in [ # (self.T_MC - self.T_BI, self.N, self.D), # (self.N_MCMC * (self.T_MC - self.T_BI), self.N, self.D), # ] def _set_edges_and_hist(n_per_axis: int): list_Theta_lin_seed_used = list_Theta_lin_seed * 1 # x-axis if list_Theta_lin_seed[:, 0].min() > 0 and lower_bounds_lin[d1] > 0: x_edges = np.linspace( np.log10(lower_bounds_lin[d1]) - 0.1, np.log10(upper_bounds_lin[d1]) + 0.1, n_per_axis, ) list_Theta_lin_seed_used[:, 0] = np.log10(list_Theta_lin_seed_used[:, 0]) is_theta_log = True else: x_edges = np.linspace( lower_bounds_lin[d1] - 0.1, upper_bounds_lin[d1] + 0.1, n_per_axis, ) is_theta_log = False # y-axis if list_Theta_lin_seed[:, 1].min() > 0 and lower_bounds_lin[d2] > 0: y_edges = np.linspace( np.log10(lower_bounds_lin[d2]) - 0.1, np.log10(upper_bounds_lin[d2]) + 0.1, n_per_axis, ) list_Theta_lin_seed_used[:, 1] = np.log10(list_Theta_lin_seed_used[:, 1]) is_y_log = True else: y_edges = np.linspace( lower_bounds_lin[d2] - 0.1, upper_bounds_lin[d2] + 0.1, n_per_axis, ) is_y_log = False H, x_edges, y_edges = np.histogram2d( list_Theta_lin_seed_used[:, 0].flatten(), list_Theta_lin_seed_used[:, 1].flatten(), bins=(x_edges, y_edges), ) return H, x_edges, y_edges, is_theta_log, is_y_log n_per_axis = 100 #! Contour levels must be increasing, ie percentiles must be decreasing percentiles_arr = np.array([0.95, 0.68]) H, x_edges, y_edges, is_theta_log, is_y_log = _set_edges_and_hist(n_per_axis) H /= H.sum() t = np.linspace(0, H.max(), 1_000) integral = ((H >= t[:, None, None]) * H).sum(axis=(1, 2)) # an error comes if the histogram grid is not fine enough to compute # the contours # Hence we progressively increase the grid size per axis until the grid # gets fine enough. # this error occurs frequently when the Markov chain size is small and # when the high proba region is very small compared to the acceptable # solutions set. while (integral[-1] > percentiles_arr[-1]) and (n_per_axis < 5e2): n_per_axis *= 2 # refine histogram grid H, x_edges, y_edges, _, _ = _set_edges_and_hist(n_per_axis) H /= H.sum() t = np.linspace(0, H.max(), 1_000) integral = ((H >= t[:, None, None]) * H).sum(axis=(1, 2)) f = interpolate.interp1d(integral, t) if integral[-1] > percentiles_arr[-1]: percentiles_arr = percentiles_arr[integral[-1] < percentiles_arr] msg = "Issue with proba contour plot for (n, d1, d2) = " msg += f"({n if n is not None else 0}, {d1}, {d2})" print(msg) if percentiles_arr.size == 0: return # define contours t_contours = f(percentiles_arr) plt.figure(figsize=(8, 6)) title = f"High Probability Region of ({list_names[d1]}, {list_names[d2]})" if n is not None: title += f" of pixel n={n}" plt.title(title) # for percentile, t_contour in zip(percentiles_arr, t_contours): plt.contour( H.T, t_contours, extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]], # label=f"{100 * percentile} %", ) if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.plot( np.log10([Theta_MMSE[0]]), np.log10([Theta_MMSE[1]]), "rx", ms=20, label="MMSE", markeredgewidth=3, ) else: plt.plot( [Theta_MMSE[0]], [Theta_MMSE[1]], "rx", ms=20, label="MMSE", markeredgewidth=3, ) if len(point_challenger) > 0: x_challenger = point_challenger["value"] if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.plot( np.log10([x_challenger[d1]]), np.log10([x_challenger[d2]]), "k+", ms=20, label=point_challenger["name"], markeredgewidth=3, ) else: plt.plot( [x_challenger[d1]], [x_challenger[d2]], "k+", ms=20, label=point_challenger["name"], markeredgewidth=3, ) if true_val is not None: if list_Theta_lin_seed.min() > 0 and lower_bounds_lin.min() > 0: plt.plot( np.log10([true_val[0]]), np.log10([true_val[1]]), "r+", ms=20, label="truth", markeredgewidth=3, ) else: plt.plot( [true_val[0]], [true_val[1]], "r+", ms=20, label="truth", markeredgewidth=3, ) plt.legend(loc="best") # in case no point in acceptable set # plt.colorbar() plt.grid() plt.xlabel(r"$\log_{10}$ " + list_names[d1] if is_theta_log else list_names[d1]) plt.ylabel(r"$\log_{10}$ " + list_names[d2] if is_y_log else list_names[d2]) # plt.tight_layout() str_n = f"_n{n}" if n is not None else "" if seed == "overall": filename = f"{folder_path}/HPR_2D{str_n}_d1{d1}_d2{d2}_overall_chain.PNG" else: filename = f"{folder_path}/HPR_2D{str_n}_d1{d1}_d2{d2}_seed{seed}" filename += "_chain.PNG" plt.savefig(filename) plt.close() return