Source code for beetroots.simulations.astro.observation.abstract_real_data

import warnings
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd

from beetroots.inversion.plots.map_shaper import MapShaper
from beetroots.simulations.astro.observation.abstract_observation import (
    SimulationObservation,
)


[docs] class SimulationRealData(SimulationObservation): r"""abstract class that reads the observation data for real observations"""
[docs] def setup_observation( self, data_int_path: str, data_err_path: str, save_obs: bool = True, ) -> Tuple[ pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, ]: r"""reads the observation data for real observations Parameters ---------- data_int_path : str path to the ``.pkl`` file that contains the observation maps data_err_path : str path to the ``.pkl`` file that contains the maps of additive noise standard deviation save_obs : bool, optional by default True Returns ------- df_int_fit : pd.DataFrame DataFrame containing the observations used for inference y_fit : np.ndarray of shape (N, L) observations to be used for inference sigma_a_fit : np.ndarray of shape (N, L) additive noise standard deviations associated with the observations used for inference omega_fit : np.ndarray of shape (N, L) censor threshold associated with the observations used for inference y_valid : np.ndarray of shape (N, L_valid) observations that are not to be used for inference sigma_a_valid : np.ndarray of shape (N, L_valid) additive noise standard deviations associated with the observations not used for inference omega_valid : np.ndarray of shape (N, L_valid) censor threshold associated with the observations not used for inference """ # * read observations # intensities assert data_int_path.endswith(".pkl") or data_int_path.endswith( ".csv" ), f"The intensity file must be either a csv or a pickle (extension pkl) file. Current: {data_int_path}" if data_int_path.endswith(".pkl"): df_int: pd.DataFrame = pd.read_pickle(data_int_path) else: df_int: pd.DataFrame = pd.read_csv(data_int_path) # standard deviations of additive noise assert data_err_path.endswith(".pkl") or data_err_path.endswith( ".csv" ), f"The standard deviation file must be either a csv or a pickle (extension `.pkl`) file. Current: {data_err_path}" if data_err_path.endswith(".pkl"): df_err: pd.DataFrame = pd.read_pickle(data_err_path) else: df_err: pd.DataFrame = pd.read_csv(data_err_path) if data_int_path.endswith(".csv") or data_err_path.endswith(".csv"): warnings.warn( "For faster loading, consider using the `.pkl` format instead `.csv`." ) assert list(df_int.index.names) == ["X", "Y"] assert list(df_err.index.names) == ["X", "Y"] assert len(df_int) == len(df_err) df_int = df_int.sort_index() df_err = df_err.sort_index() if "idx" not in list(df_err.columns): df_int["idx"] = np.arange(len(df_int)) df_err["idx"] = np.arange(len(df_err)) assert df_int.shape == df_err.shape assert list(df_int.columns) == list(df_err.columns) for line in self.list_lines_fit: assert line in list(df_int.columns) self.list_lines_valid = list( set(list(df_int.columns)) - set(self.list_lines_fit) ) r"""List[str]: list of observed lines not used for inference, that can be used for validation""" self.list_lines_valid.remove("idx") self.list_lines_valid.sort() df_int_fit = df_int.loc[:, self.list_lines_fit + ["idx"]] df_err_fit = df_err.loc[:, self.list_lines_fit + ["idx"]] df_int_valid = df_int.loc[:, self.list_lines_valid + ["idx"]] df_err_valid = df_err.loc[:, self.list_lines_valid + ["idx"]] # eliminate absurd values for col in self.list_lines_fit: df_int_fit[col] = df_int_fit[col].apply( lambda x: x if x <= 1e3 else np.inf, ) df_err_fit[col] = df_err_fit[col].apply( lambda x: x if x <= 1e3 else np.inf, ) for col in self.list_lines_valid: df_int_valid[col] = df_int_valid[col].apply( lambda x: x if x <= 1e3 else np.inf, ) df_err_valid[col] = df_err_valid[col].apply( lambda x: x if x <= 1e3 else np.inf, ) self.N = len(df_int_fit) r"""int: number of pixels / components in the observation""" # * correct values (censoring / errors / etc.) df_censor_fit = df_err_fit.copy() # df_censor.iloc[:, :-1] *= 3 # with potential censorship df_censor_fit.iloc[:, :-1] = 1e-60 # no censorship df_censor_valid = df_err_valid.copy() # df_censor.iloc[:, :-1] *= 3 # with potential censorship df_censor_valid.iloc[:, :-1] = 1e-60 # no censorship y_fit = np.nan_to_num(df_int_fit.drop(columns="idx").values, nan=1e-15) sigma_a_fit = np.nan_to_num(df_err_fit.drop(columns="idx").values, nan=1e3) omega_fit = df_censor_fit.drop(columns="idx").values y_valid = np.nan_to_num(df_int_valid.drop(columns="idx").values, nan=1e-15) sigma_a_valid = np.nan_to_num( df_err_valid.drop(columns="idx").values, nan=1e3, ) omega_valid = df_censor_valid.drop(columns="idx").values self.Theta_true_scaled = None r"""Optional[np.ndarray]: true values of the physical parameters. Always ``None`` for real applications.""" self.map_shaper = MapShaper(df_int) r"""MapShaper: defines the transformation from vectors to 2D maps""" # * save observation if save_obs: df_int_fit.to_pickle( f"{self.path_data_csv_in}/observation_maps.pkl", ) df_err_fit.to_pickle(f"{self.path_data_csv_in}/additive_std.pkl") df_censor_fit.to_pickle( f"{self.path_data_csv_in}/censor_threshold.pkl", ) return ( df_int_fit, y_fit, sigma_a_fit, omega_fit, y_valid, sigma_a_valid, omega_valid, )