diff --git a/icecube_data_reader/event_types.py b/icecube_data_reader/event_types.py index d4f9774..1a24c3d 100644 --- a/icecube_data_reader/event_types.py +++ b/icecube_data_reader/event_types.py @@ -86,28 +86,6 @@ class Refrigerator: detectors = [IC40, IC59, IC79, IC86, IC86_I, IC86_II] - ''' - @classmethod - def python2dm(cls, python): - """Returns EventType corresponding to python event-type string""" - - for dm in cls.detectors: - if dm.P == python: - return dm - else: - raise ValueError(f"No detector {python} available.") - ''' - ''' - @classmethod - def stan2dm(cls, stan): - """Returns EventType corresponding to stan event-type""" - - for dm in cls.detectors: - if dm.S == stan: - return dm - else: - raise ValueError(f"No detector {stan} available.") - ''' @classmethod def int2str(cls, int_): """Returns python event-type string corresponding to integer event-type""" @@ -136,4 +114,8 @@ def int2dm(cls, int_): return dm else: raise ValueError(f"No detector {int_} available.") + + @classmethod + def str2dm(cls, str_): + return cls.int2dm(cls.str2int(str_)) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index 77854c6..cb08c4f 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -14,12 +14,26 @@ import h5py from time import time as thyme -from icecube_data_reader.downloader import data_directory, I3_14, available_datasets, IceCubeData -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType, Refrigerator +from icecube_data_reader.downloader import ( + data_directory, + I3_14, + available_datasets, + IceCubeData, +) +from icecube_data_reader.event_types import ( + IC40, + IC59, + IC79, + IC86, + suffixes, + EventType, + Refrigerator, +) from icecube_data_reader.lifetime import LifeTime from typing import Self import logging + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -34,16 +48,16 @@ def mjd(self): return self._mjd @property - def energy(self): - return self._energy + def energies(self): + return self._energies @property - def ang_err(self): - return self._ang_err + def ang_errs(self): + return self._ang_errs @property - def coord(self): - return self._coord + def coords(self): + return self._coords @property def ra(self): @@ -52,23 +66,22 @@ def ra(self): @property def dec(self): return self._dec - - @property - def unit_vector(self): - return self._unit_vector @property - def event_type(self): - return self._event_type - + def unit_vectors(self): + return self._unit_vectors + @property - def int_event_type(self): - return self._int_event_type + def types(self): + return self._types + @property + def int_types(self): + return self._int_types @property def N(self): - return self.event_type.size + return self.types.size @u.quantity_input def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): @@ -78,7 +91,7 @@ def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): :type Emin: u.GeV :param Emax: Maximum energy, defaults to np.inf*u.GeV :type Emax: u.GeV, optional - """ + """ pass @classmethod @@ -90,6 +103,10 @@ def from_file(): def to_file(): pass + @abstractmethod + def remove(self, i) -> None: + pass + class IceTrackDR2Events(Events): """ @@ -97,29 +114,31 @@ class IceTrackDR2Events(Events): """ mjd_ = 3 - energy_ = 4 - ang_err_ = 5 - ra_ = 6 - dec_ = 7 + energies_ = 4 + ang_errs_ = 5 + ras_ = 6 + decs_ = 7 @u.quantity_input def __init__( self, - energy: u.GeV, - coord: SkyCoord, - event_type: np.ndarray, - ang_err: u.deg, + energies: u.GeV, + coords: SkyCoord, + types: np.ndarray, + ang_errs: u.deg, mjd: Time, ): - self._energy = energy - self._coord = coord - self._event_type = event_type - self._int_event_type = np.array([_.S for _ in event_type]) - self._ang_err = ang_err + self._energies = energies + self._coords = coords + self._types = types + self._int_types = np.array([_.S for _ in types]) + self._ang_errs = ang_errs self._mjd = mjd - self._coord.representation_type = "cartesian" - self._unit_vector = np.array([coord.x.value, coord.y.value, coord.z.value]).T - self._coord.representation_type = "spherical" + self._coords.representation_type = "cartesian" + self._unit_vectors = np.array( + [coords.x.value, coords.y.value, coords.z.value] + ).T + self._coords.representation_type = "spherical" def scramble_ra(self, seed: int = 42) -> None: """ @@ -139,7 +158,7 @@ def scramble_ra(self, seed: int = 42) -> None: def scramble_mjd(self, lifetime: LifeTime, seed: int = 42) -> None: pass - def select(self, mask: npt.NDArray[np.bool_]): + def select(self, mask: npt.NDArray[np.bool_]) -> None: """ Select some subset of existing events by providing a mask. :param mask: Array of bools with same length as event properties. @@ -155,21 +174,34 @@ def select(self, mask: npt.NDArray[np.bool_]): except AttributeError: pass - self._energy = self._energy[mask] - self._coord = self._coord[mask] - self._unit_vector = self._unit_vector[mask] - self._event_type = self._event_type[mask] - self._int_event_type = self._int_event_type[mask] - self._ang_err = self._ang_err[mask] + self._energies = self._energies[mask] + self._coords = self._coords[mask] + self._unit_vectors = self._unit_vectors[mask] + self._types = self._types[mask] + self._int_types = self._int_types[mask] + self._ang_errs = self._ang_errs[mask] self._mjd = self._mjd[mask] + def remove(self, i: int) -> None: + """ + Remove the event at index i + :param i: Event index + :type i: int + """ + self._energies = np.delete(self._energies, i) + self._coords = np.delete(self._coords, i) + self._unit_vectors = np.delete(self._unit_vectors, i, axis=0) + self._types = np.delete(self._types, i) + self._ang_errs = np.delete(self._ang_errs, i) + self._mjd = np.delete(self._mjd, i) + def to_file( - self, - path: Path, - append: bool = False, - group_name: str | None = None, - overwrite: bool = False - ) -> Path: + self, + path: Path, + append: bool = False, + group_name: str | None = None, + overwrite: bool = False, + ) -> Path: """Write events to file. Keyworded arguments control behaviour with existing files. If not overwrite, but `path` exists, append a timestamp to the file name. @@ -187,12 +219,12 @@ def to_file( :rtype: Path """ - self._file_keys = ["energy", "unit_vector", "event_type", "ang_err", "mjd"] + self._file_keys = ["energies", "unit_vectors", "types", "ang_errs", "mjd"] self._file_values = [ - self.energy.to(u.GeV).value, - self.unit_vector, - self.int_event_type, - self.ang_err.to(u.deg).value, + self.energies.to(u.GeV).value, + self.unit_vectors, + self.int_types, + self.ang_errs.to(u.deg).value, self.mjd.mjd, ] @@ -241,7 +273,7 @@ def to_file( def from_file( cls, filename: Path, - group_name:str = None, + group_name: str = None, ) -> Self: """Load events from .h5 file @@ -249,42 +281,53 @@ def from_file( :type filename: Path :param group_name: Name of events group, if provided when writing to file, defaults to None :type group_name: str, optional - """ + """ with h5py.File(filename, "r") as f: if group_name is None: events_folder = f["events"] else: events_folder = f[group_name] - energy = events_folder["energy"][()] * u.GeV - uv = events_folder["unit_vector"][()] - int_event_type = events_folder["event_type"][()] - event_type = np.array([Refrigerator.int2dm(_) for _ in int_event_type]) - ang_err = events_folder["ang_err"][()] * u.deg + energies = events_folder["energies"][()] * u.GeV + uv = events_folder["unit_vectors"][()] + int_types = events_folder["types"][()] + types = np.array([Refrigerator.int2dm(_) for _ in int_types]) + ang_errs = events_folder["ang_errs"][()] * u.deg # For backwards compatibility try: mjd = events_folder["mjd"][()] except KeyError: - mjd = [99.0] * len(energy) + mjd = [99.0] * len(energies) - coord = SkyCoord( + coords = SkyCoord( uv.T[0], uv.T[1], uv.T[2], representation_type="cartesian", frame="icrs" ) mjd = Time(mjd, format="mjd") - coord.representation_type = "spherical" - events = cls(energy, coord, event_type, ang_err, mjd) + coords.representation_type = "spherical" + events = cls(energies, coords, types, ang_errs, mjd) return events + @u.quantity_input + def apply_energy_cut(self, Emin: u.GeV, Emax=np.inf * u.GeV) -> None: + """Apply energy cuts to events + + :param Emin: Minimum allowed energy + :type Emin: u.GeV + :param Emax: Maximum allowed energy, defaults to np.inf*u.GeV + :type Emax: u.GeV, optional + """ + mask = (self.energies >= Emin) & (self.energies <= Emax) + self.select(mask) + @classmethod - def from_event_files(cls, *seasons: EventType) -> Self: + def from_event_files(cls, *seasons: EventType | str) -> Self: """ Load data of provided seasons. If none are provided, use all. :param seasons: Seasons to load - :returns: Event container :py:class:`icecube_data_reader.events.Events` """ @@ -310,12 +353,12 @@ def from_event_files(cls, *seasons: EventType) -> Self: data_interface = IceCubeData() data_interface.fetch(I3_14) - energy = [] - ra = [] - dec = [] + energies = [] + ras = [] + decs = [] mjd = [] - ang_err = [] - event_type = [] + ang_errs = [] + types = [] def _append_data(s, suffering: str = ""): data = np.loadtxt( @@ -325,30 +368,31 @@ def _append_data(s, suffering: str = ""): ) ) - energy.append(data[:, cls.energy_]) + energies.append(data[:, cls.energies_]) mjd.append(data[:, cls.mjd_]) - ra.append(data[:, cls.ra_]) - dec.append(data[:, cls.dec_]) - ang_err.append(data[:, cls.ang_err_]) - event_type.append(len(data[:, cls.energy_]) * [s]) + ras.append(data[:, cls.ras_]) + decs.append(data[:, cls.decs_]) + ang_errs.append(data[:, cls.ang_errs_]) + types.append(len(data[:, cls.energies_]) * [s]) for s in seasons: + s = Refrigerator.str2dm(s) if isinstance(s, str) else s if s == IC86: for suffering in suffixes: _append_data(s, suffering) else: _append_data(s) - energy = np.power(10, np.concatenate(energy)) << u.GeV + energies = np.power(10, np.concatenate(energies)) << u.GeV mjd = Time(np.concatenate(mjd), format="mjd") - ra = np.concatenate(ra) << u.deg - dec = np.concatenate(dec) << u.deg - ang_err = np.concatenate(ang_err) << u.deg - event_type = np.concatenate(event_type) - coord = SkyCoord(ra=ra, dec=dec, frame="icrs") - - events = cls(energy, coord, event_type, ang_err, mjd) - events._ra = ra - events._dec = dec + ras = np.concatenate(ras) << u.deg + decs = np.concatenate(decs) << u.deg + ang_errs = np.concatenate(ang_errs) << u.deg + types = np.concatenate(types) + coords = SkyCoord(ra=ras, dec=decs, frame="icrs") + + events = cls(energies, coords, types, ang_errs, mjd) + events._ras = ras + events._decs = decs return events diff --git a/icecube_data_reader/irf/__init__.py b/icecube_data_reader/irf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icecube_data_reader/irf/effective_area.py b/icecube_data_reader/irf/effective_area.py new file mode 100644 index 0000000..213b7db --- /dev/null +++ b/icecube_data_reader/irf/effective_area.py @@ -0,0 +1,132 @@ +""" +Classes to organise the effective area of the IceCube detector for track events +""" + +from abc import ABC, abstractmethod +import numpy as np +import pandas as pd +import os +from astropy import units as u +from pathlib import Path +from typing import Self + + +from icecube_data_reader.event_types import Refrigerator, EventType +from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 + +from astropy import units as u +from astropy.units.typing import QuantityLike + + +class EffectiveArea(ABC): + + + @classmethod + @abstractmethod + def load(cls): + """Load effective area object""" + + pass + + @property + def eff_area(self): + """ + 2D histogram of effective area values, + Etrue on axis 0 and cosz on axis 1 + """ + + return self._eff_area + + @property + def cosz_bin_edges(self): + """ + cos(zenith) bin edges corresponding to the + histogram in eff_area. + """ + + return self._cosz_bin_edges + + #@property + #def sin_dec_bin_edges(self): + # """ + # sin(dec) bin edges corresponding to the + # histogram in eff_area + # """ + # + # return self._sin_dec_bin_edges + + @property + def tE_bin_edges(self): + """ + True energy bin edges corresponding the the + histogram in eff_area. + """ + + return self._tE_bin_edges + + +class IceTrackDR2EffectiveArea(EffectiveArea): + + @u.quantity_input + def __init__( + self, + eff_area: u.m**2, + tE_bin_edges: u.GeV, + dec_bin_edges: u.rad, + season: EventType, + ): + + self._season = season + self._tE_bin_edges = tE_bin_edges + # this inverts the order of bins + self._cosz_bin_edges = np.sort(- np.sin(dec_bin_edges.to_value(u.rad))) + # hence we invert the order of elements along the cosz axis of eff_area + self._eff_area = np.flip(eff_area, axis=1) + #self._sin_dec_bin_edges = np.sin(dec_bin_edges.to_value(u.rad)) + + @classmethod + def load(cls, season: EventType, data_path: Path = data_directory) -> Self: + """Create effective area object + + :param season: Detector season of + :type season: EventType + :param data_path: Path to lookf or data files, defaults to data_directory + :type data_path: Path, optional + :return: Effective area instance + :rtype: :py:class:`icecube_data_reader.irf.effective_area.IceTrackDR2EffectiveArea` + """ + + season_string = str(season) + file = data_path / Path( + available_datasets[I3_14]["dir"] + ) / Path( + available_datasets[I3_14]["subdir"] + ) / Path( + f"irfs/{season_string}_effectiveArea.csv" + ) + + filelayout = ["Emin", "Emax", "DECmin", "DECmax", "Aeff"] + + output = pd.read_csv(file, comment="#", sep="\s+", names=filelayout).to_dict() + + true_energy_lower = np.array(list(set(output["Emin"].values()))) + true_energy_upper = np.array(list(set(output["Emax"].values()))) + + + dec_lower = np.array(list(set(output["DECmin"].values()))) + dec_upper = np.array(list(set(output["DECmax"].values()))) + + tE_bin_edges = np.sort(np.power(10, + np.unique(np.vstack((true_energy_lower, true_energy_upper))) + )) << u.GeV + + dec_bin_edges = np.sort(np.radians(np.unique(np.vstack((dec_lower, dec_upper))))) << u.rad + + eff_area = np.reshape( + np.array(list(output["Aeff"].values())) * 1e-4, + (len(dec_lower), len(true_energy_lower)), + ).T << u.m**2 + + aeff = cls(eff_area, tE_bin_edges, dec_bin_edges, season) + + return aeff diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py new file mode 100644 index 0000000..665b9f7 --- /dev/null +++ b/icecube_data_reader/irf/irf.py @@ -0,0 +1,584 @@ +""" +Classes to organise energy and angular resolution of IceCube track events +""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Self +from itertools import pairwise +import numpy as np +from scipy import stats +import astropy.units as u +from astropy.coordinates import SkyCoord +from astropy.time import Time +from tqdm.notebook import tqdm +from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 +from icecube_data_reader.event_types import EventType +from icecube_data_reader.utils.utils import DummyPDF +from icecube_data_reader.events import IceTrackDR2Events +from itertools import product + +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class InstrumentResponseFunction(ABC): + @classmethod + @abstractmethod + def load(cls): + pass + + +class EnergyResolution(ABC): + pass + + +class AngularResolution(ABC): + pass + + +class IceTracksDR2InstrumentResponseFunction( + InstrumentResponseFunction, EnergyResolution, AngularResolution +): + + _STACK = {} + + def __init__( + self, + data: np.ndarray, + season: EventType, + random_state: np.random.Generator = np.random.default_rng(seed=42), + ): + """ + DO NOT instantiate it via init, but rather through the class method `load` + + :param path: Path to smearing matrix + :type path: Path + :param season: Detector season + :type season: EventType + """ + + self._data = data + self._season = season + self._random = random_state + + self.etrue_idx = 0 + self.dec_idx = 2 + self.ereco_idx = 4 + self.psf_idx = 6 + self.ang_err_idx = 8 + + if season in self._STACK: + self.__dict__ = self.STACK[season].__dict__ + + @property + def random(self): + return self._random + + @random.setter + def random(self, val: np.random.Generator): + if not isinstance(val, np.random.Generator): + raise ValueError("random must be instance of `np.random.Generator`") + self._random = val + + @property + def data(self): + return self._data + + @property + def season(self): + return self._season + + def _post_init(self): + # Break naming convention because r and t are too close on the keyboard + self.recoE_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + # Create empty array for rv_histograms storing the energy resolution + # for each bin of true energy and true declination + self.recoE_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.recoE_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=stats.rv_histogram, + ) + # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram + self.psf_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.psf_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.psf_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + + self.faulty = [] + for c_e in range(self.log_tE_bin_centers.size): + for c_d, d_l in enumerate(self.dec_bin_edges[:-1]): + idx = np.argwhere( + (self.data[:, 0] == self.log_tE_bin_edges[c_e]) + * (self.data[:, 2] == d_l) + ).squeeze() + reduced = self.data[idx] + if reduced[:, -1].sum() == 0.0: + self.faulty.append((c_e, c_d)) + + @u.quantity_input + def create_IRF( + self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, + ) -> None: + """Create the entire IRF, i.e. energy and angular resolution. + + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + """ + + self.create_ang_res(dec, dec_range, show_progress) + + @u.quantity_input + def create_eres( + self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, + ) -> None: + """Create the energy resolution. + + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + """ + + if dec is not None: + dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 + dec_range = range(dec_idx, dec_idx + 1) + dec_size = 1 + else: + dec_min, dec_max = dec_range + dec_min_idx = np.digitize(dec_min.to_value(u.deg), self.dec_bin_edges) - 1 + dec_max_idx = np.digitize(dec_max.to_value(u.deg), self.dec_bin_edges) - 1 + dec_idx = (dec_min_idx, dec_max_idx) + dec_range = range(dec_min_idx, dec_max_idx) + dec_size = dec_max_idx - dec_min_idx + + for c_d, c_tE in tqdm( + product(dec_range, range(self.log_tE_bin_centers.size)), + disable=not show_progress, + desc="Energy resolution", + total=self.log_tE_bin_centers.size * dec_size, + ): + if isinstance( + self.recoE_sampling[c_tE][c_d], stats.rv_histogram + ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + continue + self._create_recoE_distribution(c_tE, c_d) + + @u.quantity_input + def create_ang_res( + self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, + ) -> None: + """Create angular resolution distributions. + The intermediate step of kinematic angle / PSF is irrelevant, + as it is not an observable. We skip this explicit simulation step by marginalising over it. + For each Etrue, DEC pair, the binning of ang_err is fixed. Hence we collect the + ang_err_bin_edges and find the fractional counts only for specific values of reconstructed energy. + If the required energy resolution has not been created, it will be done automatically. + + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + :raises AssertionError: If not all ang_err_bin_edges within one pair of Etrue and DEC + are the same, an AssertionError is raised + """ + + if dec is not None: + dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 + dec_range = range(dec_idx, dec_idx + 1) + dec_size = 1 + else: + dec_min, dec_max = dec_range + dec_min_idx = np.digitize(dec_min.to_value(u.deg), self.dec_bin_edges) - 1 + dec_max_idx = np.digitize(dec_max.to_value(u.deg), self.dec_bin_edges) - 1 + dec_idx = (dec_min_idx, dec_max_idx) + dec_range = range(dec_min_idx, dec_max_idx) + dec_size = dec_max_idx - dec_min_idx + + self.ang_err_bin_edges = np.zeros( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21), + ) + self.ang_err_hists = np.zeros( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20), + ) + for c_d, c_tE in tqdm( + product(dec_range, range(self.log_tE_bin_centers.size)), + disable=not show_progress, + desc="Angular resolution", + total=self.log_tE_bin_centers.size * dec_size, + ): + if isinstance( + self.recoE_sampling[c_tE][c_d], stats.rv_histogram + ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + etrue_data = self.data[ + self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_tE] + ] + data = etrue_data[ + etrue_data[:, self.dec_idx] == self.dec_bin_edges[c_d] + ] + _, _, data = self._create_recoE_distribution(c_tE, c_d, return_data=True) + + all_ang_err_bins = np.empty( + (self.recoE_bin_edges[c_tE, c_d].size - 1,), dtype=np.ndarray + ) + for c_rE, rE in enumerate(self.recoE_bin_edges[c_tE, c_d][:-1]): + # self.ang_err_hists[c_tE, c_d] = np.empty((self.recoE_hists[c_tE, c_d].size), dtype=np.ndarray) + red_data = data[data[:, self.ereco_idx] == rE] + all_ang_err_bins[c_rE] = np.unique( + red_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() + ) + ang_err_counts = np.zeros(all_ang_err_bins[c_rE].size - 1) + for c_ar, ar in enumerate(all_ang_err_bins[c_rE][:-1]): + ang_err_counts[c_ar] = red_data[ + red_data[:, self.ang_err_idx] == ar, -1 + ].sum() + hist = ang_err_counts.copy() + self.ang_err_hists[c_tE, c_d, c_rE] = ( + hist / (hist * np.diff(all_ang_err_bins[c_rE])).sum() + ) + for low, high in pairwise(all_ang_err_bins): + if not np.all(low == high): + # Has been tested in a notebook, should be fine! + raise AssertionError( + "Not all ang_err bins are the same! Investigate manually and fix me." + ) + + self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() + + @classmethod + def load(cls, season: EventType) -> Self: + """Create energy resolution object for provided season + + :param season: Season + :type season: EventType + :return: Energy resolution + :rtype: :py:class:`IceTrackDR2EnergyResolution` + """ + + path = ( + Path(data_directory) + / Path(available_datasets[I3_14]["dir"]) + / Path(available_datasets[I3_14]["subdir"]) + / Path("irfs") + / Path(f"{str(season)}_smearing.csv") + ) + + data = np.loadtxt(path) + season = season + + # Extract true energy bins and declination bins, fixed for all Ereco distributions + log_tE_bin_edges = np.sort(np.unique(data[:, 0:2].flatten())) + log_tE_bin_centers = log_tE_bin_edges[:-1] + np.diff(log_tE_bin_edges) / 2 + tE_bin_edges = np.power(10, log_tE_bin_edges) << u.GeV + dec_bin_edges = np.sort(np.unique(data[:, 2:4].flatten())) + + # use log binning for angular quantities + data[:, 6:-1] = np.log10(data[:, 6:-1]) + irf = cls(data, season) + irf.log_tE_bin_edges = log_tE_bin_edges + irf.log_tE_bin_centers = log_tE_bin_centers + irf.tE_bin_edges = tE_bin_edges + irf.dec_bin_edges = dec_bin_edges + irf.sin_dec_bin_edges = np.sin(np.deg2rad(dec_bin_edges)) + irf.sin_dec_bin_centers = ( + irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:] + ) / 2 + + irf._post_init() + + return irf + + @u.quantity_input + def sample_energy( + self, + coord: SkyCoord, + Etrue: u.GeV, + N: int = 1, + ) -> np.ndarray: + """Sample reco energy + + :param coord: Source coordinate, + assumes only one coordinate per function call + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: Array of reconstructed energies + :rtype: np.ndarray + """ + + _, _, recoE_out = self._sample_energy(coord, Etrue, N) + return recoE_out + + @u.quantity_input + def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events: + """Sample reco energy + + :param coord: Source coordinate, + assumes only one coordinate per function call + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: + :rtype: np.ndarray + """ + + tE_idx, c_d, recoE = self._sample_energy(coord, Etrue, N) + set_e = np.unique(tE_idx) + + recoE = np.atleast_1d(recoE) + ang_errs_out = np.zeros(recoE.size) + + for idx_e in set_e: + _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) + reco_idxs = ( + np.digitize(recoE[_index_e], self.recoE_bin_edges[idx_e, c_d]) - 1 + ) + set_rE = np.unique(reco_idxs) + for idx_rE in set_rE: + random = stats.rv_histogram( + ( + self.ang_err_hists[idx_e, c_d, idx_rE], + self.ang_err_bin_edges[idx_e, c_d], + ), + density=True, + ) + _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) + rvs = random.rvs(size=_index_rE.size, random_state=self.random) + # we sample log10(angle/deg), need the angle in rad + # for sampling from the Rayleigh distribution + ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) + + deflection_angles = np.atleast_1d( + stats.rayleigh.rvs(scale=ang_errs_out, random_state=self.random) * u.rad + ) + ang_errs_out = (ang_errs_out * u.rad).to(u.deg) + # Deflecte like we do in stan: sample rotation axis orthonormal to the event + # sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma + # rotate the initial direction/coord by `theta` around rotation axis + + coord.representation_type = "cartesian" + direction = np.array([coord.x, coord.y, coord.z]) + coord.representation_type = "spherical" + new_directions = np.zeros((3, N)) + for c, angle in enumerate(deflection_angles): + rot_axis = self._sample_orthonormal(direction) + new_directions[:, c] = self._rotate_around_vector( + direction, rot_axis, angle + ) + new_coords = SkyCoord( + x=new_directions[0], + y=new_directions[1], + z=new_directions[2], + frame="icrs", + representation_type="cartesian", + ) + new_coords.representation_type = "spherical" + events = IceTrackDR2Events( + np.power(10, recoE) * u.GeV, + new_coords, + np.full(N, self.season), + ang_errs_out, + Time(np.full(N, 99.0), format="mjd"), + ) + return events + + def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: + """Sample a vector that is orthonormal to the input + + :param x: Input array + :type x: np.ndarray + :return: Vector orthonormal to input + :rtype: np.ndarray + """ + v = stats.norm().rvs(size=x.shape, random_state=self.random) + projected = x * np.dot(x, v) / np.linalg.norm(x) + ortho = v - projected + orthonormal = ortho / np.linalg.norm(ortho) + return orthonormal + + @u.quantity_input + def _rotate_around_vector( + self, rotatee: np.ndarray, axis: np.ndarray, theta: u.rad + ) -> np.ndarray: + """Rotate a vector around a second vector by an angle""" + + theta = theta.to_value(u.rad) + return ( + axis * np.dot(rotatee, axis) + + np.cos(theta) * np.cross(np.cross(axis, rotatee), axis) + + np.sin(theta) * np.cross(axis, rotatee) + ) + + def _sample_energy( + self, coord: SkyCoord, Etrue: u.GeV, N: int = 1 + ) -> tuple[np.ndarray, int, np.ndarray]: + """Sample reco energy of events + + :param coord: Source coordinate, + assumes only one coordinate per function call + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: Etrue indices, declination index, array of reconstructed energies + :rtype: tuple[np.ndarray, int, np.ndarray] + """ + + if Etrue.shape == () and N > 1: + Etrue = np.full(N, Etrue.to_value(u.GeV)) + else: + Etrue = np.atleast_1d(Etrue.to_value(u.GeV)) + coord.representation_type = "spherical" + dec = coord.dec + c_d = np.digitize(dec.deg, self.dec_bin_edges) - 1 + + coord.representation_type = "cartesian" + coord.representation_type = "spherical" + + log_tE = np.log10(Etrue) + tE_idx = np.digitize(log_tE, self.log_tE_bin_edges) - 1 + + recoE_out = np.zeros(Etrue.shape) + + set_e = np.unique(tE_idx) + for idx_e in set_e: + _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) + recoE = self.recoE_sampling[idx_e][c_d].rvs( + size=_index_e.size, random_state=self.random + ) + recoE_out[_index_e] = recoE + + if recoE_out.size == 1: + return tE_idx, c_d, recoE_out[0] + return tE_idx, c_d, recoE_out + + def _create_recoE_distribution( + self, + c_e: int, + c_d: int, + return_data: bool = False, + ) -> tuple[np.ndarray, ...]: + """Creates the reconstructed energy distribution for given true + energy and declination by marginalising over the kinematic (PSF) angle + and angular error. + + :param c_e: Index of true energy bin + :type c_e: int + :param c_d: Index of declination bin + :type c_d: int + :param return_data: Set to true if the relevant entries of the smearing matrix + are to be returned additionally, defaults to False + :param data: Relevant entries (i.e. for true energy, declination) + of the smearing matrix, defaults to None + :type return_data: bool, optional + :return: Tuple of fractional counts per bin and bin edges, optional relevant entries + of smearing matrix + :rtype: tuple[np.ndarray, ...] + """ + + if (c_e, c_d) in self.faulty: + self.recoE_bin_edges[c_e, c_d] = np.arange(21) + self.recoE_hists[c_e, c_d] = np.zeros(20) + self.recoE_sampling[c_e, c_d] = DummyPDF() + if return_data: + return None, None, None + return None, None + # Get entries at relevant true energy and declination + reduced_data = self.data[ + self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] + ].copy() + reduced_data = reduced_data[ + reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] + ] + + # Create bin edges of reco energy + bins = np.sort( + np.unique(reduced_data[:, self.ereco_idx : self.ereco_idx + 2].flatten()) + ) + + frac_counts = np.zeros(bins.size - 1) + + # marginalise over angular quantities + for c_b, b in enumerate(bins[:-1]): + frac_counts[c_b] = np.sum( + reduced_data[b == reduced_data[:, self.ereco_idx], -1] + ) + + self.recoE_sampling[c_e][c_d] = stats.rv_histogram( + (frac_counts, bins), density=False + ) + self.recoE_bin_edges[c_e][c_d] = bins + # summed = frac_counts.sum() + self.recoE_hists[c_e][c_d] = frac_counts / (frac_counts * np.diff(bins)).sum() + + if return_data: + return frac_counts, bins, reduced_data + return frac_counts, bins diff --git a/icecube_data_reader/lifetime.py b/icecube_data_reader/lifetime.py index 0604413..20ff5cb 100644 --- a/icecube_data_reader/lifetime.py +++ b/icecube_data_reader/lifetime.py @@ -7,8 +7,22 @@ import os from abc import ABC from astropy import units as u -from icecube_data_reader.downloader import data_directory, I3_14, available_datasets, IceCubeData -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType +from astropy.time import Time +from icecube_data_reader.downloader import ( + data_directory, + I3_14, + available_datasets, + IceCubeData, +) +from icecube_data_reader.event_types import ( + IC40, + IC59, + IC79, + IC86, + suffixes, + EventType, + Refrigerator, +) import logging @@ -46,31 +60,55 @@ def lifetime_from_mjd( for s in self._data.keys(): # Query histograms for fraction of total lifetime in a season # multiply by total lifetime in a season to get appropriate value - time = (self._dists[s].cdf(mjd_max) - self._dists[s].cdf(mjd_min)) * self._lifetimes[s] + time = ( + self._dists[s].cdf(mjd_max) - self._dists[s].cdf(mjd_min) + ) * self._lifetimes[s] # set atol to 1e-9 days, so we are below the time resolution of event mjd (1e-8 days) if squeeze and np.isclose(time.to_value(u.d), 0.0, atol=1e-9): - time = 0 * u.yr + continue output[s] = time return output - def lifetime_from_season(self, *seasons) -> dict[EventType | u.quantity.Quantity[u.yr]]: + def mjd_from_season(self, season: EventType | str) -> tuple[float, float]: + """Return MJD_min, MJD_max for provided season + + :param season: Event type, either the EventType class or str(EventType) + :type season: EventType | str + :return: Tuple of MJD_min, MJD_max + :rtype: tuple[float, float] + """ + + for c, v in enumerate([IC40, IC59, IC79, IC86]): + if str(v) == str(season): + break + else: + raise ValueError(f"Season {season} is not implemented.") + + return self._times[c, 0], self._times[c, 1] + + def lifetime_from_season( + self, *seasons: EventType | str + ) -> dict[EventType | u.quantity.Quantity[u.yr]]: """Compute lifetime of given seasons - :param seasons: Seasons to calculate lifetimes of + :param seasons: Seasons to calculate lifetimes of, either EventType or str(EventType) + :type seasons: EventType | str :return: Dictionary with detector season: duration in astropy.units.yr :rtype: dict[EventType|u.quantity.Quantity[u.yr]] """ output = {} for s in seasons: + if isinstance(s, str): + s = Refrigerator.str2dm(s) output[s] = self._lifetimes[s] return output -class DR2LifeTime(LifeTime): +class IceTrackDR2LifeTime(LifeTime): def __init__(self): directory = available_datasets[I3_14]["dir"] @@ -134,3 +172,17 @@ def __init__(self): on_off[::2] = 1.0 # density=True for on_off to be treated as density self._dists[s] = stats.rv_histogram((on_off, bins), density=True) + + def draw_event_mjd( + self, event_numbers: dict[EventType, int] + ) -> dict[EventType, Time]: + """Draw event MJDs for scrambling the arrival times of events + TODO: add min/max mjd + + :param event_numbers: Dict of event type and requested numbers + :type event_numbers: dict[EventType, int] + :return: Dictionary of event types and new MJDs + :rtype: dict[EventType, Time] + """ + + raise NotImplementedError() diff --git a/icecube_data_reader/utils/utils.py b/icecube_data_reader/utils/utils.py new file mode 100644 index 0000000..c636465 --- /dev/null +++ b/icecube_data_reader/utils/utils.py @@ -0,0 +1,62 @@ +import numpy as np + + +class DummyPDF: + def __init__(self): + pass + + def pdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def cdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def rvs(self, size=1, random_state=42): + raise NotImplementedError("Dummies cannot be sampled from!") + + +class ddict(dict): + """ + Modified dictionary class, derived from `dict`. + Used to nest dictionaries without having to write [] all the time when adding or calling. + """ + + def __init__(self): + super().__init__() + + def add(self, value, *keys): + """ + Add value to chain of keys. + Careful: this may overwrite existing values! + :param value: Value to be added + :param keys: Tuple containing ordered keys behind which the value should be added. + """ + # TODO: protect from overwriting + + temp = self + for key in keys[:-1]: + try: + temp[key] + except KeyError: + temp[key] = {} + finally: + temp = temp[key] + temp[keys[-1]] = value + + def __call__(self, *keys): + """ + Call value of nested dicts. + :param keys: Tuple of ordered keys whose value should be returned + :return: value behind tuple of keys + """ + + temp = self + for key in keys: + temp = temp[key] + return temp diff --git a/pyproject.toml b/pyproject.toml index 09ff39e..7c6bd02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ dependencies = [ "tqdm", "requests", "requests_cache", + "ipywidgets", + "pandas", ] [project.optional-dependencies] diff --git a/tests/test_aeff.py b/tests/test_aeff.py new file mode 100644 index 0000000..06e90cd --- /dev/null +++ b/tests/test_aeff.py @@ -0,0 +1,6 @@ +from icecube_data_reader.irf.effective_area import IceTrackDR2EffectiveArea +from icecube_data_reader.event_types import IC86 + + +def test_load(): + aeff = IceTrackDR2EffectiveArea.load(IC86) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 88dc635..7685376 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,8 +1,8 @@ from icecube_data_reader.downloader import IceCubeData, I3_10 - +import pytest data = IceCubeData() - +@pytest.mark.skip("convenience") def test_file_download(output_directory): data.fetch(I3_10, write_to=output_directory) diff --git a/tests/test_events.py b/tests/test_events.py index 5d3c607..2d09eb2 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -12,12 +12,14 @@ def test_event_number(): events = IceTrackDR2Events.from_event_files() assert events.N == 1643355 + @pytest.fixture def test_saving(output_directory): events = IceTrackDR2Events.from_event_files(IC40) path = events.to_file(Path(output_directory) / "ic40_events.h5") return (path, events) + def test_overwriting(output_directory, test_saving): path, events = test_saving time_appended = events.to_file(path) @@ -28,7 +30,10 @@ def test_loading(test_saving): path, ic40 = test_saving loaded = IceTrackDR2Events.from_file(path) - assert np.all(np.isclose(loaded.energy.to_value(u.GeV), ic40.energy.to_value(u.GeV))) + assert np.all( + np.isclose(loaded.energies.to_value(u.GeV), ic40.energies.to_value(u.GeV)) + ) + def test_selecting(test_saving): idx = 5 @@ -37,12 +42,20 @@ def test_selecting(test_saving): mask = np.zeros(N, dtype=bool) mask[idx] = True - e = events.energy[idx].to_value(u.GeV) - et = events.int_event_type[idx] + e = events.energies[idx].to_value(u.GeV) + et = events.int_types[idx] events.select(mask) - assert pytest.approx(events.energy[0].to_value(u.GeV)) == e - assert et == events.int_event_type[0] + assert pytest.approx(events.energies[0].to_value(u.GeV)) == e + assert et == events.int_types[0] + + +def test_removing(test_saving): + idx = 5 + _, events = test_saving + N = events.N + events.remove(idx) + assert events.N == N - 1 def test_erroneous_selecting(test_saving): @@ -53,6 +66,17 @@ def test_erroneous_selecting(test_saving): mask[idx] = True with pytest.raises(ValueError, match="Mask needs to be of the same length as N."): - mask = np.zeros(N+1, dtype=bool) + mask = np.zeros(N + 1, dtype=bool) mask[1] = 1 - events.select(mask) \ No newline at end of file + events.select(mask) + + +def test_energy_cut(test_saving): + _, events = test_saving + + Emin = 5e3 * u.GeV + Emax = 7e4 * u.GeV + + events.apply_energy_cut(Emin=Emin, Emax=Emax) + assert np.all(events.energies <= Emax) + assert np.all(events.energies >= Emin) diff --git a/tests/test_irf.py b/tests/test_irf.py new file mode 100644 index 0000000..60467a2 --- /dev/null +++ b/tests/test_irf.py @@ -0,0 +1,64 @@ +import pytest + +from icecube_data_reader.irf.irf import IceTracksDR2InstrumentResponseFunction +from icecube_data_reader.event_types import IC86 +import numpy as np +import astropy.units as u +from astropy.coordinates import SkyCoord + + +@pytest.fixture +def irf(): + irf = IceTracksDR2InstrumentResponseFunction.load(IC86) + irf.create_eres(show_progress=False) + return irf + + +def test_eres(irf): + + Etrue = 10**2.25 * u.GeV + et_idx = np.digitize(Etrue, irf.tE_bin_edges) - 1 + + coord = SkyCoord(ra=90 * u.deg, dec=2 * u.deg, frame="icrs") + dec_idx = np.digitize(coord.dec.deg, irf.dec_bin_edges) - 1 + + recoE = irf.sample_energy(coord, Etrue, N=100_000) + bins = irf.recoE_bin_edges[et_idx][dec_idx] + + pdf = irf.recoE_hists[et_idx][dec_idx] + + n, _ = np.histogram(recoE, bins, density=True) + cutoff = pdf >= 1e-2 + + assert np.all(pytest.approx(pdf[cutoff], abs=1e-2) == n[cutoff]) + + +def test_orthonormal(irf): + vec = np.array([0.0, 0.0, 1.0]) + orthonormal = irf._sample_orthonormal(vec) + assert pytest.approx(np.dot(vec, orthonormal)) == 0.0 + assert pytest.approx(np.linalg.norm(orthonormal)) == 1.0 + + +def test_rotation(irf): + vec = np.array([0.0, 0.0, 1.0]) + rotated = irf._rotate_around_vector( + vec, np.array([1.0, 0.0, 0.0]), np.pi / 2 * u.rad + ) + assert pytest.approx(np.dot(vec, rotated)) == 0.0 + assert pytest.approx(rotated[0]) == 0 + assert pytest.approx(rotated[1]) == -1.0 + assert pytest.approx(rotated[2]) == 0.0 + + +def test_ang_res(irf): + coord = SkyCoord(ra=90 * u.deg, dec=5 * u.deg) + irf.create_ang_res(dec=coord.dec, show_progress=False) + + N = 100 + Etrue = 4e4 * u.GeV + events = irf.sample(coord, Etrue, N=N) + assert events.N == 100 + assert np.unique(events.coords.ra).size == 100 + assert np.unique(events.coords.dec).size == 100 + assert np.unique(events.coords.separation(coord)).size == 100 diff --git a/tests/test_lifetime.py b/tests/test_lifetime.py index 603701d..3be4cb7 100644 --- a/tests/test_lifetime.py +++ b/tests/test_lifetime.py @@ -1,10 +1,10 @@ -from icecube_data_reader.lifetime import DR2LifeTime +from icecube_data_reader.lifetime import IceTrackDR2LifeTime from icecube_data_reader.event_types import DR2, IC40 import numpy as np from astropy import units as u import pytest -lt = DR2LifeTime() +lt = IceTrackDR2LifeTime() def test_total_lt(): @@ -22,3 +22,15 @@ def test_lt_from_mjd(): obs_time = lt.lifetime_from_mjd(mjd_min, mjd_max)[IC40].to_value(u.d) comparison = np.sum(np.diff(lt._data[IC40])[:2]) assert pytest.approx(obs_time) == comparison + + +def test_mjd_from_season(): + lt = IceTrackDR2LifeTime() + for s in DR2.available_irfs: + mjd_min, mjd_max = lt.mjd_from_season(s) + assert mjd_min == lt._data[s][0, 0] + assert mjd_max == lt.data[s][-1, 1] + + with pytest.raises(ValueError) as e: + lt.mjd_from_season("potato") + assert "potato" in e