From 540def95c2b11a844e5da54742b23b12a27eb0e2 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Mon, 25 May 2026 12:50:39 +0200 Subject: [PATCH 01/39] add empty files --- icecube_data_reader/irf/__init__.py | 0 icecube_data_reader/irf/effective_area.py | 3 +++ icecube_data_reader/irf/irf.py | 3 +++ 3 files changed, 6 insertions(+) create mode 100644 icecube_data_reader/irf/__init__.py create mode 100644 icecube_data_reader/irf/effective_area.py create mode 100644 icecube_data_reader/irf/irf.py diff --git a/icecube_data_reader/irf/__init__.py b/icecube_data_reader/irf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icecube_data_reader/irf/effective_area.py b/icecube_data_reader/irf/effective_area.py new file mode 100644 index 0000000..6ec62d2 --- /dev/null +++ b/icecube_data_reader/irf/effective_area.py @@ -0,0 +1,3 @@ +""" +Classes to organise the effective area of the IceCube detector for track events +""" \ No newline at end of file diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py new file mode 100644 index 0000000..f3af570 --- /dev/null +++ b/icecube_data_reader/irf/irf.py @@ -0,0 +1,3 @@ +""" +Classes to organise energy and angular resolution of IceCube track events +""" \ No newline at end of file From e9a03a8ad61edc13e05d88ee5343cc0d9b8fbf50 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Mon, 25 May 2026 21:00:24 +0200 Subject: [PATCH 02/39] effective area draft --- icecube_data_reader/irf/effective_area.py | 128 +++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/icecube_data_reader/irf/effective_area.py b/icecube_data_reader/irf/effective_area.py index 6ec62d2..e3736ef 100644 --- a/icecube_data_reader/irf/effective_area.py +++ b/icecube_data_reader/irf/effective_area.py @@ -1,3 +1,129 @@ """ Classes to organise the effective area of the IceCube detector for track events -""" \ No newline at end of file +""" + +from abc import ABC, abstractmethod +import numpy as np +import pandas as pd +import os +from astropy import units as u +from pathlib import Path +from typing import Self + + +from icecube_data_reader.event_types import Refrigerator, EventType +from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 + +from astropy import units as u +from astropy.units.typing import QuantityLike + + +class EffectiveArea(ABC): + + + @classmethod + @abstractmethod + def load(cls): + """Load effective area object""" + + pass + + @property + def eff_area(self): + """ + 2D histogram of effective area values, + Etrue on axis 0 and cosz on axis 1 + """ + + return self._eff_area + + @property + def cosz_bin_edges(self): + """ + cos(zenith) bin edges corresponding to the + histogram in eff_area. + """ + + return self._cosz_bin_edges + + @property + def sin_dec_bin_edges(self): + """ + sin(dec) bin edges corresponding to the + histogram in eff_area + """ + + return self._sin_dec_bin_edges + + @property + def tE_bin_edges(self): + """ + True energy bin edges corresponding the the + histogram in eff_area. + """ + + return self._tE_bin_edges + + +class IceTrackDR2EffectiveArea(EffectiveArea): + + @u.quantity_input + def __init__( + self, + eff_area: u.m**2, + tE_bin_edges: u.GeV, + dec_bin_edges: u.rad, + season: EventType, + ): + + self._tE_bin_edges = tE_bin_edges + self._eff_area = eff_area + self._sin_dec_bin_edges = np.sin(dec_bin_edges.to_value(u.rad)) + self._cosz_bin_edges = - np.sin(dec_bin_edges.to_value(u.rad)) + + @classmethod + def load(cls, season: EventType, data_path: Path = data_directory) -> Self: + """Create effective area object + + :param season: Detector season of + :type season: EventType + :param data_path: Path to lookf or data files, defaults to data_directory + :type data_path: Path, optional + :return: Effective area instance + :rtype: :py:class:`icecube_data_reader.irf.effective_area.IceTrackDR2EffectiveArea` + """ + + season_string = str(season) + file = data_directory / Path( + available_datasets[I3_14]["dir"] + ) / Path( + available_datasets[I3_14]["subdir"] + ) / Path( + f"irfs/{season_string}_effectiveArea.csv" + ) + + filelayout = ["Emin", "Emax", "DECmin", "DECmax", "Aeff"] + + output = pd.read_csv(file, comment="#", sep="\s+", names=filelayout).to_dict() + + true_energy_lower = np.array(list(set(output["Emin"].values()))) + true_energy_upper = np.array(list(set(output["Emax"].values()))) + + + dec_lower = np.array(list(set(output["DECmin"].values()))) + dec_upper = np.array(list(set(output["DECmax"].values()))) + + tE_bin_edges = np.sort(np.power(10, + np.unique(np.vstack((true_energy_lower, true_energy_upper))) + )) << u.GeV + + dec_bin_edges = np.sort(np.radians(np.unique(np.vstack((dec_lower, dec_upper))))) << u.rad + + eff_area = np.reshape( + np.array(list(output["Aeff"].values())) * 1e-4, + (len(dec_lower), len(true_energy_lower)), + ).T << u.m**2 + + aeff = cls(eff_area, tE_bin_edges, dec_bin_edges, season) + + return aeff From 2edcd01ac0671c37f7ee0970237dede6888aaa8e Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 27 May 2026 13:46:12 +0200 Subject: [PATCH 03/39] first draft for eres --- icecube_data_reader/irf/irf.py | 135 ++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index f3af570..c721a3b 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -1,3 +1,136 @@ """ Classes to organise energy and angular resolution of IceCube track events -""" \ No newline at end of file +""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Self +from itertools import pairwise +import numpy as np +from scipy import stats +import astropy.units as u + +from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 +from icecube_data_reader.event_types import EventType + +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class EnergyResolution(ABC): + pass + + @classmethod + @abstractmethod + def load(cls): + pass + + +class IceTrackDR2EnergyResolution(EnergyResolution): + + def __init__(self, path: Path, season: EventType): + """ + :param path: Path to smearing matrix + :type path: Path + :param season: Detector season + :type season: EventType + """ + + self.data = np.loadtxt(path) + self.season = season + + # Extract true energy bins and declination bins, fixed for all Ereco distributions + self.tE_bin_edges = np.sort(np.array(list(set(self.data[:, 0:2].flatten())))) + self.dec_bin_edges = np.sort(np.array(list(set(self.data[:, 2:4].flatten())))) + self.sin_dec_bin_edges = np.sin(self.dec_bin_edges) + + logging.debug(f"True energy bin edges: {self.tE_bin_edges}") + logging.debug(f"Dec bin edges: {self.dec_bin_edges}") + + # Break naming convention because r and t are too close on the keyboard + self.recoE_bin_edges = np.empty( + (self.tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), + dtype=np.ndarray, + ) + # Create empty array for rv_histograms storing the energy resolution + # for each bin of true energy and true declination + self.reco_energy_hists = np.empty( + (self.tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), + dtype=stats.rv_histogram, + ) + + @u.quantity_input + def create_eres_at_dec(self, dec: u.rad) -> None: + """Create energy resolution histograms at provided declination + + :param dec: Declination + :type dec: u.rad + """ + dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 + for c_tE, (true_E_low, true_E_high) in enumerate(pairwise(self.tE_bin_edges)): + frac_counts, bins = self.marginalise_over_angles(c_tE, dec_idx) + # Set density=False because smearing matrix provides unnormalised fractional counts + hist = stats.rv_histogram((frac_counts, bins), density=False) + self.reco_energy_hists[c_tE, dec_idx] = hist + self.recoE_bin_edges[c_tE, dec_idx] = bins + + @classmethod + def load(cls, season: EventType) -> Self: + """Create energy resolution object for provided season + + :param season: Season + :type season: EventType + :return: Energy resolution + :rtype: :py:class:`IceTrackDR2EnergyResolution` + """ + + path = ( + Path(data_directory) + / Path(available_datasets[I3_14]["dir"]) + / Path(available_datasets[I3_14]["subdir"]) + / Path("irfs") + / Path(f"{str(season)}_smearing.csv") + ) + + return cls(path, season) + + def marginalise_over_angles( + self, c_e: int, c_d: int + ) -> tuple[np.ndarray, np.ndarray]: + """Creates the reconstructed energy distribution for given true + energy and declination by marginalising over the kinematic (PSF) angle + and angular error. + + :param c_e: Index of true energy bin + :type c_e: int + :param c_d: Index of true declination (conversly sin(declination) bin + :type c_d: int + :return: Tuple of fractional counts per bin and bin edges + :rtype: tuple[np.ndarray, np.ndarray] + """ + + ereco_idx = 4 + + data = self.data[self.data[:, 0] == self.tE_bin_edges[c_e]] + reduced_data = data[data[:, 2] == self.dec_bin_edges[c_d]] + + bins = np.array( + sorted( + list( + set(reduced_data[:, ereco_idx]).union( + set(reduced_data[:, ereco_idx + 1]) + ) + ) + ) + ) + + frac_counts = np.zeros(bins.shape[0] - 1) + + # marginalise over angular quantities + for c_b, b in enumerate(bins[:-1]): + indices = np.nonzero(np.isclose(b, reduced_data[:, ereco_idx])) + frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + + return frac_counts, bins From eab022652881a1b3d8133b3c56f5221666654dda Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 27 May 2026 13:56:15 +0200 Subject: [PATCH 04/39] rename attribute --- icecube_data_reader/irf/irf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index c721a3b..c7d4a4a 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Self -from itertools import pairwise import numpy as np from scipy import stats import astropy.units as u @@ -56,7 +55,7 @@ def __init__(self, path: Path, season: EventType): ) # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination - self.reco_energy_hists = np.empty( + self.recoE_hists = np.empty( (self.tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), dtype=stats.rv_histogram, ) @@ -69,11 +68,11 @@ def create_eres_at_dec(self, dec: u.rad) -> None: :type dec: u.rad """ dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 - for c_tE, (true_E_low, true_E_high) in enumerate(pairwise(self.tE_bin_edges)): + for c_tE in range(self.tE_bin_edges.size - 1): frac_counts, bins = self.marginalise_over_angles(c_tE, dec_idx) # Set density=False because smearing matrix provides unnormalised fractional counts hist = stats.rv_histogram((frac_counts, bins), density=False) - self.reco_energy_hists[c_tE, dec_idx] = hist + self.recoE_hists[c_tE, dec_idx] = hist self.recoE_bin_edges[c_tE, dec_idx] = bins @classmethod From 85c692eb82be7209b46f84fa3fe68d49dfe57ee9 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 27 May 2026 13:59:38 +0200 Subject: [PATCH 05/39] add units --- icecube_data_reader/irf/irf.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index c7d4a4a..8f85d89 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -41,22 +41,27 @@ def __init__(self, path: Path, season: EventType): self.season = season # Extract true energy bins and declination bins, fixed for all Ereco distributions - self.tE_bin_edges = np.sort(np.array(list(set(self.data[:, 0:2].flatten())))) - self.dec_bin_edges = np.sort(np.array(list(set(self.data[:, 2:4].flatten())))) - self.sin_dec_bin_edges = np.sin(self.dec_bin_edges) + self.log_tE_bin_edges = np.sort( + np.array(list(set(self.data[:, 0:2].flatten()))) + ) + self.tE_bin_edges = np.power(10, self.log_tE_bin_edges) << u.GeV + self.dec_bin_edges = ( + np.sort(np.array(list(set(self.data[:, 2:4].flatten())))) << u.deg + ) + self.sin_dec_bin_edges = np.sin(self.dec_bin_edges.to_value(u.rad)) - logging.debug(f"True energy bin edges: {self.tE_bin_edges}") + logging.debug(f"True energy bin edges: {self.log_tE_bin_edges}") logging.debug(f"Dec bin edges: {self.dec_bin_edges}") # Break naming convention because r and t are too close on the keyboard self.recoE_bin_edges = np.empty( - (self.tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), + (self.log_tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), dtype=np.ndarray, ) # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination self.recoE_hists = np.empty( - (self.tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), + (self.log_tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), dtype=stats.rv_histogram, ) @@ -67,8 +72,8 @@ def create_eres_at_dec(self, dec: u.rad) -> None: :param dec: Declination :type dec: u.rad """ - dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 - for c_tE in range(self.tE_bin_edges.size - 1): + dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 + for c_tE in range(self.log_tE_bin_edges.size - 1): frac_counts, bins = self.marginalise_over_angles(c_tE, dec_idx) # Set density=False because smearing matrix provides unnormalised fractional counts hist = stats.rv_histogram((frac_counts, bins), density=False) @@ -112,8 +117,8 @@ def marginalise_over_angles( ereco_idx = 4 - data = self.data[self.data[:, 0] == self.tE_bin_edges[c_e]] - reduced_data = data[data[:, 2] == self.dec_bin_edges[c_d]] + data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] + reduced_data = data[data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg)] bins = np.array( sorted( From 3c0f847133b9781350084edda6449541df57fb53 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 11:24:19 +0200 Subject: [PATCH 06/39] add remove method, formatting --- icecube_data_reader/events.py | 76 +++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index 77854c6..6fec6c0 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -11,15 +11,32 @@ from astropy.coordinates import SkyCoord from astropy import units as u from astropy.time import Time +import matplotlib.pyplot as plt +from matplotlib import colors +import matplotlib.cm as cm import h5py from time import time as thyme -from icecube_data_reader.downloader import data_directory, I3_14, available_datasets, IceCubeData -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType, Refrigerator +from icecube_data_reader.downloader import ( + data_directory, + I3_14, + available_datasets, + IceCubeData, +) +from icecube_data_reader.event_types import ( + IC40, + IC59, + IC79, + IC86, + suffixes, + EventType, + Refrigerator, +) from icecube_data_reader.lifetime import LifeTime from typing import Self import logging + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -52,7 +69,7 @@ def ra(self): @property def dec(self): return self._dec - + @property def unit_vector(self): return self._unit_vector @@ -60,12 +77,11 @@ def unit_vector(self): @property def event_type(self): return self._event_type - + @property def int_event_type(self): return self._int_event_type - @property def N(self): return self.event_type.size @@ -78,7 +94,7 @@ def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): :type Emin: u.GeV :param Emax: Maximum energy, defaults to np.inf*u.GeV :type Emax: u.GeV, optional - """ + """ pass @classmethod @@ -90,6 +106,10 @@ def from_file(): def to_file(): pass + @abstractmethod + def remove(self, i) -> None: + pass + class IceTrackDR2Events(Events): """ @@ -139,7 +159,7 @@ def scramble_ra(self, seed: int = 42) -> None: def scramble_mjd(self, lifetime: LifeTime, seed: int = 42) -> None: pass - def select(self, mask: npt.NDArray[np.bool_]): + def select(self, mask: npt.NDArray[np.bool_]) -> None: """ Select some subset of existing events by providing a mask. :param mask: Array of bools with same length as event properties. @@ -163,13 +183,26 @@ def select(self, mask: npt.NDArray[np.bool_]): self._ang_err = self._ang_err[mask] self._mjd = self._mjd[mask] + def remove(self, i: int) -> None: + """ + Remove the event at index i + :param i: Event index + :type i: int + """ + self._energy = np.delete(self._energy, i) + self._coord = np.delete(self._coord, i) + self._unit_vector = np.delete(self._unit_vector, i, axis=0) + self._event_type = np.delete(self._event_type, i) + self._ang_err = np.delete(self._ang_err, i) + self._mjd = np.delete(self._mjd, i) + def to_file( - self, - path: Path, - append: bool = False, - group_name: str | None = None, - overwrite: bool = False - ) -> Path: + self, + path: Path, + append: bool = False, + group_name: str | None = None, + overwrite: bool = False, + ) -> Path: """Write events to file. Keyworded arguments control behaviour with existing files. If not overwrite, but `path` exists, append a timestamp to the file name. @@ -241,7 +274,7 @@ def to_file( def from_file( cls, filename: Path, - group_name:str = None, + group_name: str = None, ) -> Self: """Load events from .h5 file @@ -249,7 +282,7 @@ def from_file( :type filename: Path :param group_name: Name of events group, if provided when writing to file, defaults to None :type group_name: str, optional - """ + """ with h5py.File(filename, "r") as f: if group_name is None: events_folder = f["events"] @@ -277,6 +310,18 @@ def from_file( return events + @u.quantity_input + def apply_energy_cut(self, Emin: u.GeV, Emax=np.inf * u.GeV) -> None: + """Apply energy cuts to events + + :param Emin: Minimum allowed energy + :type Emin: u.GeV + :param Emax: Maximum allowed energy, defaults to np.inf*u.GeV + :type Emax: u.GeV, optional + """ + mask = (self.energy >= Emin) & (self.energy <= Emax) + self.select(mask) + @classmethod def from_event_files(cls, *seasons: EventType) -> Self: """ @@ -284,7 +329,6 @@ def from_event_files(cls, *seasons: EventType) -> Self: If none are provided, use all. :param seasons: Seasons to load - :returns: Event container :py:class:`icecube_data_reader.events.Events` """ From d2f4226628687b87de49f7be7cc01c2c6e4c3063 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 11:24:31 +0200 Subject: [PATCH 07/39] first draft for complete IRF, very slow --- icecube_data_reader/irf/irf.py | 199 ++++++++++++++++++++++++++++++--- 1 file changed, 184 insertions(+), 15 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 8f85d89..93aafdb 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -18,16 +18,24 @@ logger.setLevel(logging.DEBUG) -class EnergyResolution(ABC): - pass - +class InstrumentResponseFunction(ABC): @classmethod @abstractmethod def load(cls): pass -class IceTrackDR2EnergyResolution(EnergyResolution): +class EnergyResolution(ABC): + pass + + +class AngularResolution(ABC): + pass + + +class IceTrackDR2InstrumentResponseFunction( + InstrumentResponseFunction, EnergyResolution, AngularResolution +): def __init__(self, path: Path, season: EventType): """ @@ -44,11 +52,17 @@ def __init__(self, path: Path, season: EventType): self.log_tE_bin_edges = np.sort( np.array(list(set(self.data[:, 0:2].flatten()))) ) + self.log_tE_bin_centers = ( + self.log_tE_bin_edges[:-1] + np.diff(self.log_tE_bin_edges) / 2 + ) self.tE_bin_edges = np.power(10, self.log_tE_bin_edges) << u.GeV self.dec_bin_edges = ( np.sort(np.array(list(set(self.data[:, 2:4].flatten())))) << u.deg ) self.sin_dec_bin_edges = np.sin(self.dec_bin_edges.to_value(u.rad)) + self.sin_dec_bin_centers = ( + self.sin_dec_bin_edges[:-1] + np.diff(self.sin_dec_bin_edges) / 2 + ) logging.debug(f"True energy bin edges: {self.log_tE_bin_edges}") logging.debug(f"Dec bin edges: {self.dec_bin_edges}") @@ -64,6 +78,54 @@ def __init__(self, path: Path, season: EventType): (self.log_tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), dtype=stats.rv_histogram, ) + # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram + self.psf_hists = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.ang_err_hists = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + + @u.quantity_input + def create_ang_res_at_dec(self, dec: u.rad) -> None: + """Create angular resolution histograms at provided declination + + :param dec: Declination + :type dec: u.rad + """ + + dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 + for c_tE in range(self.log_tE_bin_centers.size): + if not isinstance(self.recoE_hists[c_tE, dec_idx], stats.rv_histogram): + frac_counts, bins, data = self._create_recoE_distribution( + c_tE, dec_idx, return_data=True + ) + self.recoE_hists[c_tE, dec_idx] = stats.rv_histogram( + (frac_counts, bins), density=False + ) + self.recoE_bin_edges[c_tE, dec_idx] = bins + else: + data = None + bins = self.recoE_bin_edges[c_tE, dec_idx] + for c_rE in range(bins.size - 1): + self.psf_hists[c_tE][dec_idx].append([]) + self.ang_err_hists[c_tE][dec_idx].append([]) + self.create_angular_distributions(c_tE, dec_idx, c_rE, data=data) + pass + + # TODO needs to call create_eres_at_dec to ensure eres sampling is possible + # due to chained histograms + pass + + @u.quantity_input + def create_IRF_at_dec(self, dec: u.rad) -> None: + """Create entire chain of IRF distributions at provided declination. + + :param dec: Declination + :type dec: u.rad + """ + + self.create_ang_res_at_dec(dec) @u.quantity_input def create_eres_at_dec(self, dec: u.rad) -> None: @@ -74,11 +136,9 @@ def create_eres_at_dec(self, dec: u.rad) -> None: """ dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 for c_tE in range(self.log_tE_bin_edges.size - 1): - frac_counts, bins = self.marginalise_over_angles(c_tE, dec_idx) - # Set density=False because smearing matrix provides unnormalised fractional counts - hist = stats.rv_histogram((frac_counts, bins), density=False) - self.recoE_hists[c_tE, dec_idx] = hist - self.recoE_bin_edges[c_tE, dec_idx] = bins + if isinstance(self.recoE_hists[c_tE, dec_idx], stats.rv_histogram): + continue + self._create_recoE_distribution(c_tE, dec_idx) @classmethod def load(cls, season: EventType) -> Self: @@ -100,26 +160,31 @@ def load(cls, season: EventType) -> Self: return cls(path, season) - def marginalise_over_angles( - self, c_e: int, c_d: int - ) -> tuple[np.ndarray, np.ndarray]: + def _create_recoE_distribution( + self, c_e: int, c_d: int, return_data: bool = False + ) -> tuple[np.ndarray, ...]: """Creates the reconstructed energy distribution for given true energy and declination by marginalising over the kinematic (PSF) angle and angular error. :param c_e: Index of true energy bin :type c_e: int - :param c_d: Index of true declination (conversly sin(declination) bin + :param c_d: Index of true declination (conversely sin(declination)) bin :type c_d: int - :return: Tuple of fractional counts per bin and bin edges + :param return_data: If true return the relevant entries of the smearing matrix, defaults to False + :type return_data: bool, optional + :return: Tuple of fractional counts per bin and bin edges, optional relevant entries + of smearing matrix :rtype: tuple[np.ndarray, np.ndarray] """ ereco_idx = 4 + # Get entries at relevant true energy and declination data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] reduced_data = data[data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg)] + # Create bin edges of reco energy bins = np.array( sorted( list( @@ -130,11 +195,115 @@ def marginalise_over_angles( ) ) - frac_counts = np.zeros(bins.shape[0] - 1) + frac_counts = np.zeros(bins.size - 1) # marginalise over angular quantities for c_b, b in enumerate(bins[:-1]): indices = np.nonzero(np.isclose(b, reduced_data[:, ereco_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + self.recoE_hists[c_e, c_d] = stats.rv_histogram( + (frac_counts, bins), density=False + ) + self.recoE_bin_edges[c_e, c_d] = bins + + if return_data: + return frac_counts, bins, reduced_data return frac_counts, bins + + def create_angular_distributions( + self, + c_e: int, + c_d: int, + c_rE: int, + data: None | np.ndarray = None, + return_data: bool = False, + ) -> tuple[np.ndarray, ...]: + """Creates PSF distribution for provided indices of preceeding histograms + by marginalising over the angular error. + + :param c_e: Index of true energy bin + :type c_e: int + :param c_d: Index of true declination (conversely sin(declination)) bin + :type c_d: int + :param c_rE: Index of reconstructed energy bin + :type c_rE: int + :param data: Relevant entries (i.e. for true energy, declination) + of the smearing matrix, defaults to None + :type data: None | np.ndarray, optional + :return: Tuple of fractional counts ber bin and bin edges, optional relevant data + of smearing matrix + :rtype: tuple[np.ndarray, ...] + """ + + psf_idx = 6 + + if data is None: + # Get entries at relevant true energy and declination + reduced_data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] + reduced_data = reduced_data[ + reduced_data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg) + ] + + else: + reduced_data = data + + # Get entries at relevant reco energy + reduced_data = reduced_data[ + reduced_data[:, 4] == self.recoE_bin_edges[c_e, c_d][c_rE] + ] + + # Create bin edges of kinematic angle / PSF + bins = np.array( + sorted( + list( + set(reduced_data[:, psf_idx]).union( + set(reduced_data[:, psf_idx + 1]) + ) + ) + ) + ) + + frac_counts = np.zeros(bins.size - 1) + self.psf_hists[c_e][c_d][c_rE] = [] + self.ang_err_hists[c_e][c_d][c_rE] = [] + for c_b, b in enumerate(bins[:-1]): + indices = np.nonzero(np.isclose(b, reduced_data[:, psf_idx])) + frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + hist = stats.rv_histogram((frac_counts, bins), density=False) + self.psf_hists[c_e][c_d][c_rE].append(hist) + psf_reduced_data = reduced_data[reduced_data[:, psf_idx] == b] + self.ang_err_hists[c_e][c_d][c_rE].append([]) + self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) + + if return_data: + return frac_counts, bins, reduced_data + return frac_counts, bins + + def _create_ang_err_distribution( + self, + c_e: int, + c_d: int, + c_rE: int, + c_psf: int, + reduced_data: np.ndarray, + ) -> tuple[np.ndarray, ...]: + + ang_err_idx = 8 + # Reduced data by etrue, dec, ereco, psf + bins = np.array( + sorted( + list( + set(reduced_data[:, ang_err_idx]).union( + set(reduced_data[:, ang_err_idx + 1]) + ) + ) + ) + ) + + frac_counts = np.zeros(bins.size - 1) + for c_b, b in enumerate(bins[:-1]): + indices = np.nonzero(np.isclose(b, reduced_data[:, ang_err_idx])) + frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + hist = stats.rv_histogram((frac_counts, bins), density=False) + self.ang_err_hists[c_e][c_d][c_rE][c_psf].append(hist) From 496b410f1042f2995827d59299bd3f7d2ab2a7e4 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 14:57:16 +0200 Subject: [PATCH 08/39] slight speedup --- icecube_data_reader/irf/irf.py | 44 +++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 93aafdb..d40d946 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -18,6 +18,9 @@ logger.setLevel(logging.DEBUG) +from line_profiler import profile + + class InstrumentResponseFunction(ABC): @classmethod @abstractmethod @@ -87,6 +90,7 @@ def __init__(self, path: Path, season: EventType): ] @u.quantity_input + @profile def create_ang_res_at_dec(self, dec: u.rad) -> None: """Create angular resolution histograms at provided declination @@ -96,21 +100,23 @@ def create_ang_res_at_dec(self, dec: u.rad) -> None: dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 for c_tE in range(self.log_tE_bin_centers.size): + data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_tE]] + data = data[data[:, 2] == self.dec_bin_edges[dec_idx]] if not isinstance(self.recoE_hists[c_tE, dec_idx], stats.rv_histogram): - frac_counts, bins, data = self._create_recoE_distribution( - c_tE, dec_idx, return_data=True + frac_counts, bins, ereco_data = self._create_recoE_distribution( + c_tE, dec_idx, data=data, return_data=True ) self.recoE_hists[c_tE, dec_idx] = stats.rv_histogram( (frac_counts, bins), density=False ) self.recoE_bin_edges[c_tE, dec_idx] = bins else: - data = None + ereco_data = None bins = self.recoE_bin_edges[c_tE, dec_idx] for c_rE in range(bins.size - 1): self.psf_hists[c_tE][dec_idx].append([]) self.ang_err_hists[c_tE][dec_idx].append([]) - self.create_angular_distributions(c_tE, dec_idx, c_rE, data=data) + self.create_angular_distributions(c_tE, dec_idx, c_rE, data=ereco_data) pass # TODO needs to call create_eres_at_dec to ensure eres sampling is possible @@ -118,6 +124,7 @@ def create_ang_res_at_dec(self, dec: u.rad) -> None: pass @u.quantity_input + @profile def create_IRF_at_dec(self, dec: u.rad) -> None: """Create entire chain of IRF distributions at provided declination. @@ -128,6 +135,7 @@ def create_IRF_at_dec(self, dec: u.rad) -> None: self.create_ang_res_at_dec(dec) @u.quantity_input + @profile def create_eres_at_dec(self, dec: u.rad) -> None: """Create energy resolution histograms at provided declination @@ -160,8 +168,13 @@ def load(cls, season: EventType) -> Self: return cls(path, season) + @profile def _create_recoE_distribution( - self, c_e: int, c_d: int, return_data: bool = False + self, + c_e: int, + c_d: int, + data: None | np.ndarray = None, + return_data: bool = False, ) -> tuple[np.ndarray, ...]: """Creates the reconstructed energy distribution for given true energy and declination by marginalising over the kinematic (PSF) angle @@ -171,6 +184,9 @@ def _create_recoE_distribution( :type c_e: int :param c_d: Index of true declination (conversely sin(declination)) bin :type c_d: int + :param data: Relevant entries (i.e. for true energy, declination) + of the smearing matrix, defaults to None + :type data: None | np.ndarray, optional :param return_data: If true return the relevant entries of the smearing matrix, defaults to False :type return_data: bool, optional :return: Tuple of fractional counts per bin and bin edges, optional relevant entries @@ -180,9 +196,12 @@ def _create_recoE_distribution( ereco_idx = 4 - # Get entries at relevant true energy and declination - data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] - reduced_data = data[data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg)] + if data is None: + # Get entries at relevant true energy and declination + data = self.data[self.data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg)] + reduced_data = data[data[:, 0] == self.log_tE_bin_edges[c_e]] + else: + reduced_data = data # Create bin edges of reco energy bins = np.array( @@ -211,6 +230,7 @@ def _create_recoE_distribution( return frac_counts, bins, reduced_data return frac_counts, bins + @profile def create_angular_distributions( self, c_e: int, @@ -280,6 +300,7 @@ def create_angular_distributions( return frac_counts, bins, reduced_data return frac_counts, bins + @profile def _create_ang_err_distribution( self, c_e: int, @@ -307,3 +328,10 @@ def _create_ang_err_distribution( frac_counts[c_b] = np.sum(reduced_data[indices, -1]) hist = stats.rv_histogram((frac_counts, bins), density=False) self.ang_err_hists[c_e][c_d][c_rE][c_psf].append(hist) + + +if __name__ == "__main__": + from icecube_data_reader.event_types import IC86 + + irf = IceTrackDR2InstrumentResponseFunction.load(IC86) + irf.create_IRF_at_dec(0.0 * u.deg) From add71b24525598a19bfe623676849e7baf5f432a Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 16:45:01 +0200 Subject: [PATCH 09/39] endless bugfixes --- icecube_data_reader/irf/irf.py | 268 +++++++++++++++------------------ 1 file changed, 121 insertions(+), 147 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index d40d946..315f9e9 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -40,7 +40,7 @@ class IceTrackDR2InstrumentResponseFunction( InstrumentResponseFunction, EnergyResolution, AngularResolution ): - def __init__(self, path: Path, season: EventType): + def __init__(self, data: np.ndarray, season: EventType): """ :param path: Path to smearing matrix :type path: Path @@ -48,112 +48,81 @@ def __init__(self, path: Path, season: EventType): :type season: EventType """ - self.data = np.loadtxt(path) + self.data = data self.season = season - # Extract true energy bins and declination bins, fixed for all Ereco distributions - self.log_tE_bin_edges = np.sort( - np.array(list(set(self.data[:, 0:2].flatten()))) - ) - self.log_tE_bin_centers = ( - self.log_tE_bin_edges[:-1] + np.diff(self.log_tE_bin_edges) / 2 - ) - self.tE_bin_edges = np.power(10, self.log_tE_bin_edges) << u.GeV - self.dec_bin_edges = ( - np.sort(np.array(list(set(self.data[:, 2:4].flatten())))) << u.deg - ) - self.sin_dec_bin_edges = np.sin(self.dec_bin_edges.to_value(u.rad)) - self.sin_dec_bin_centers = ( - self.sin_dec_bin_edges[:-1] + np.diff(self.sin_dec_bin_edges) / 2 - ) - - logging.debug(f"True energy bin edges: {self.log_tE_bin_edges}") - logging.debug(f"Dec bin edges: {self.dec_bin_edges}") + self.etrue_idx = 0 + self.ereco_idx = 2 + self.psf_idx = 4 + self.ang_err_idx = 6 + def _post_init(self): # Break naming convention because r and t are too close on the keyboard - self.recoE_bin_edges = np.empty( - (self.log_tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), - dtype=np.ndarray, - ) + self.recoE_bin_edges = [None] * self.log_tE_bin_centers.size # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination - self.recoE_hists = np.empty( - (self.log_tE_bin_edges.size - 1, self.sin_dec_bin_edges.size - 1), - dtype=stats.rv_histogram, - ) + self.recoE_hists = [None] * self.log_tE_bin_centers.size # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram - self.psf_hists = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.ang_err_hists = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] + self.psf_hists = [[] for _ in self.log_tE_bin_centers] + self.psf_bin_edges = [[] for _ in self.log_tE_bin_centers] + self.psf_sampling = [[] for _ in self.log_tE_bin_centers] + self.ang_err_hists = [[] for _ in self.log_tE_bin_centers] + self.ang_err_bin_edges = [[] for _ in self.log_tE_bin_centers] + self.ang_err_sampling = [[] for _ in self.log_tE_bin_centers] @u.quantity_input @profile - def create_ang_res_at_dec(self, dec: u.rad) -> None: - """Create angular resolution histograms at provided declination + def create_IRF(self) -> None: + """Create entire chain of IRF distributions at provided declination.""" - :param dec: Declination - :type dec: u.rad - """ + self.create_ang_res() + + @u.quantity_input + @profile + def create_eres(self) -> None: + """Create energy resolution histograms""" + + for c_tE in range(self.log_tE_bin_edges.size - 1): + if isinstance(self.recoE_hists[c_tE], stats.rv_histogram): + continue + self._create_recoE_distribution(c_tE) + + @u.quantity_input + @profile + def create_ang_res(self) -> None: + """Create angular resolution histograms""" - dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 for c_tE in range(self.log_tE_bin_centers.size): - data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_tE]] - data = data[data[:, 2] == self.dec_bin_edges[dec_idx]] - if not isinstance(self.recoE_hists[c_tE, dec_idx], stats.rv_histogram): + if not isinstance(self.recoE_hists[c_tE], stats.rv_histogram): frac_counts, bins, ereco_data = self._create_recoE_distribution( - c_tE, dec_idx, data=data, return_data=True + c_tE, return_data=True ) - self.recoE_hists[c_tE, dec_idx] = stats.rv_histogram( + self.recoE_hists[c_tE] = stats.rv_histogram( (frac_counts, bins), density=False ) - self.recoE_bin_edges[c_tE, dec_idx] = bins + self.recoE_bin_edges[c_tE] = bins else: ereco_data = None - bins = self.recoE_bin_edges[c_tE, dec_idx] + bins = self.recoE_bin_edges[c_tE] + # bins is reco energy, each bin is mapped to a PSF distribution + self.psf_hists[c_tE] = [None] * (bins.size - 1) + self.psf_bin_edges[c_tE] = [None] * (bins.size - 1) + self.psf_sampling[c_tE] = [None] * (bins.size - 1) + + self.ang_err_hists[c_tE] = [None] * (bins.size - 1) + self.ang_err_bin_edges[c_tE] = [None] * (bins.size - 1) + self.ang_err_sampling[c_tE] = [None] * (bins.size - 1) for c_rE in range(bins.size - 1): - self.psf_hists[c_tE][dec_idx].append([]) - self.ang_err_hists[c_tE][dec_idx].append([]) - self.create_angular_distributions(c_tE, dec_idx, c_rE, data=ereco_data) - pass - - # TODO needs to call create_eres_at_dec to ensure eres sampling is possible - # due to chained histograms - pass - - @u.quantity_input - @profile - def create_IRF_at_dec(self, dec: u.rad) -> None: - """Create entire chain of IRF distributions at provided declination. - - :param dec: Declination - :type dec: u.rad - """ - - self.create_ang_res_at_dec(dec) - - @u.quantity_input - @profile - def create_eres_at_dec(self, dec: u.rad) -> None: - """Create energy resolution histograms at provided declination - - :param dec: Declination - :type dec: u.rad - """ - dec_idx = np.digitize(dec, self.dec_bin_edges) - 1 - for c_tE in range(self.log_tE_bin_edges.size - 1): - if isinstance(self.recoE_hists[c_tE, dec_idx], stats.rv_histogram): - continue - self._create_recoE_distribution(c_tE, dec_idx) + self.create_angular_distributions(c_tE, c_rE, data=ereco_data) @classmethod - def load(cls, season: EventType) -> Self: + def load(cls, season: EventType, dec: u.deg) -> Self: """Create energy resolution object for provided season :param season: Season :type season: EventType + :param dec: Declination + :type dec: u.deg :return: Energy resolution :rtype: :py:class:`IceTrackDR2EnergyResolution` """ @@ -166,14 +135,35 @@ def load(cls, season: EventType) -> Self: / Path(f"{str(season)}_smearing.csv") ) - return cls(path, season) + data = np.loadtxt(path) + season = season + + # Extract true energy bins and declination bins, fixed for all Ereco distributions + log_tE_bin_edges = np.sort(np.unique(data[:, 0:2].flatten())) + log_tE_bin_centers = log_tE_bin_edges[:-1] + np.diff(log_tE_bin_edges) / 2 + tE_bin_edges = np.power(10, log_tE_bin_edges) << u.GeV + dec_bin_edges = np.sort(np.unique(data[:, 2:4].flatten())) + + dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 + data = data[data[:, 2] == dec_bin_edges[dec_idx]] + # Remove declination bc we only use one declination bin + mask = [True, True, False, False, True, True, True, True, True, True, True] + data = data[:, mask] + irf = cls(data, season) + irf.log_tE_bin_edges = log_tE_bin_edges + irf.log_tE_bin_centers = log_tE_bin_centers + irf.tE_bin_edges = tE_bin_edges + irf.dec_bin_edges = dec_bin_edges + irf.dec_idx = dec_idx + + irf._post_init() + + return irf @profile def _create_recoE_distribution( self, c_e: int, - c_d: int, - data: None | np.ndarray = None, return_data: bool = False, ) -> tuple[np.ndarray, ...]: """Creates the reconstructed energy distribution for given true @@ -182,49 +172,33 @@ def _create_recoE_distribution( :param c_e: Index of true energy bin :type c_e: int - :param c_d: Index of true declination (conversely sin(declination)) bin - :type c_d: int :param data: Relevant entries (i.e. for true energy, declination) of the smearing matrix, defaults to None - :type data: None | np.ndarray, optional - :param return_data: If true return the relevant entries of the smearing matrix, defaults to False :type return_data: bool, optional :return: Tuple of fractional counts per bin and bin edges, optional relevant entries of smearing matrix :rtype: tuple[np.ndarray, np.ndarray] """ - ereco_idx = 4 - - if data is None: - # Get entries at relevant true energy and declination - data = self.data[self.data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg)] - reduced_data = data[data[:, 0] == self.log_tE_bin_edges[c_e]] - else: - reduced_data = data + # Get entries at relevant true energy and declination + reduced_data = self.data[ + self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] + ] # Create bin edges of reco energy - bins = np.array( - sorted( - list( - set(reduced_data[:, ereco_idx]).union( - set(reduced_data[:, ereco_idx + 1]) - ) - ) - ) + bins = np.sort( + np.unique(reduced_data[:, self.ereco_idx : self.ereco_idx + 2].flatten()) ) frac_counts = np.zeros(bins.size - 1) # marginalise over angular quantities for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, ereco_idx])) + indices = np.nonzero(np.isclose(b, reduced_data[:, self.ereco_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - self.recoE_hists[c_e, c_d] = stats.rv_histogram( - (frac_counts, bins), density=False - ) - self.recoE_bin_edges[c_e, c_d] = bins + self.recoE_hists[c_e] = stats.rv_histogram((frac_counts, bins), density=False) + self.recoE_bin_edges[c_e] = bins if return_data: return frac_counts, bins, reduced_data @@ -234,7 +208,6 @@ def _create_recoE_distribution( def create_angular_distributions( self, c_e: int, - c_d: int, c_rE: int, data: None | np.ndarray = None, return_data: bool = False, @@ -244,8 +217,6 @@ def create_angular_distributions( :param c_e: Index of true energy bin :type c_e: int - :param c_d: Index of true declination (conversely sin(declination)) bin - :type c_d: int :param c_rE: Index of reconstructed energy bin :type c_rE: int :param data: Relevant entries (i.e. for true energy, declination) @@ -256,46 +227,39 @@ def create_angular_distributions( :rtype: tuple[np.ndarray, ...] """ - psf_idx = 6 - if data is None: # Get entries at relevant true energy and declination reduced_data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] - reduced_data = reduced_data[ - reduced_data[:, 2] == self.dec_bin_edges[c_d].to_value(u.deg) - ] else: reduced_data = data # Get entries at relevant reco energy reduced_data = reduced_data[ - reduced_data[:, 4] == self.recoE_bin_edges[c_e, c_d][c_rE] + reduced_data[:, 2] == self.recoE_bin_edges[c_e][c_rE] ] # Create bin edges of kinematic angle / PSF - bins = np.array( - sorted( - list( - set(reduced_data[:, psf_idx]).union( - set(reduced_data[:, psf_idx + 1]) - ) - ) - ) + bins = np.sort( + np.unique(reduced_data[:, self.psf_idx : self.psf_idx + 2].flatten()) ) + self.psf_bin_edges[c_e][c_rE] = bins + self.ang_err_sampling[c_e][c_rE] = [None] * (bins.size - 1) + self.ang_err_hists[c_e][c_rE] = [None] * (bins.size - 1) + self.ang_err_bin_edges[c_e][c_rE] = [None] * (bins.size - 1) + frac_counts = np.zeros(bins.size - 1) - self.psf_hists[c_e][c_d][c_rE] = [] - self.ang_err_hists[c_e][c_d][c_rE] = [] + # Calculate fractional count in each PSF bin by marginalising over ang_err for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, psf_idx])) + indices = np.nonzero(np.isclose(b, reduced_data[:, self.psf_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - hist = stats.rv_histogram((frac_counts, bins), density=False) - self.psf_hists[c_e][c_d][c_rE].append(hist) - psf_reduced_data = reduced_data[reduced_data[:, psf_idx] == b] - self.ang_err_hists[c_e][c_d][c_rE].append([]) - self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) + psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] + self._create_ang_err_distribution(c_e, c_rE, c_b, psf_reduced_data) + hist = stats.rv_histogram((frac_counts, bins), density=False) + self.psf_sampling[c_e][c_rE] = hist + self.psf_hists[c_e][c_rE] = frac_counts / frac_counts.sum() if return_data: return frac_counts, bins, reduced_data return frac_counts, bins @@ -304,34 +268,44 @@ def create_angular_distributions( def _create_ang_err_distribution( self, c_e: int, - c_d: int, c_rE: int, c_psf: int, reduced_data: np.ndarray, ) -> tuple[np.ndarray, ...]: + """Create angular error distribution for provided true energy, + reco energy and kinematic angle indices. + + :param c_e: Index of true energy + :type c_e: int + :param c_rE: Index of reconstructed energy + :type c_rE: int + :param c_psf: Index of kinematic angle/PSF + :type c_psf: int + :param reduced_data: Relevant entries of the smearing matrix + :type reduced_data: np.ndarray + :return: _description_ + :rtype: tuple[np.ndarray, ...] + """ - ang_err_idx = 8 # Reduced data by etrue, dec, ereco, psf - bins = np.array( - sorted( - list( - set(reduced_data[:, ang_err_idx]).union( - set(reduced_data[:, ang_err_idx + 1]) - ) - ) + bins = np.sort( + np.unique( + reduced_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() ) ) + self.ang_err_bin_edges[c_e][c_rE][c_psf] = bins frac_counts = np.zeros(bins.size - 1) for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, ang_err_idx])) + indices = np.nonzero(np.isclose(b, reduced_data[:, self.ang_err_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - hist = stats.rv_histogram((frac_counts, bins), density=False) - self.ang_err_hists[c_e][c_d][c_rE][c_psf].append(hist) + hist = stats.rv_histogram((frac_counts, bins), density=False) + self.ang_err_sampling[c_e][c_rE][c_psf] = hist + self.ang_err_hists[c_e][c_rE][c_psf] = frac_counts / frac_counts.sum() if __name__ == "__main__": from icecube_data_reader.event_types import IC86 irf = IceTrackDR2InstrumentResponseFunction.load(IC86) - irf.create_IRF_at_dec(0.0 * u.deg) + irf.create_IRF() From b48c4a2ff22696cb93b4ef43498e63cbbbb233fc Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 16:51:41 +0200 Subject: [PATCH 10/39] rename all fields to plural --- icecube_data_reader/events.py | 163 +++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 74 deletions(-) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index 77854c6..e73b3d4 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -14,12 +14,26 @@ import h5py from time import time as thyme -from icecube_data_reader.downloader import data_directory, I3_14, available_datasets, IceCubeData -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType, Refrigerator +from icecube_data_reader.downloader import ( + data_directory, + I3_14, + available_datasets, + IceCubeData, +) +from icecube_data_reader.event_types import ( + IC40, + IC59, + IC79, + IC86, + suffixes, + EventType, + Refrigerator, +) from icecube_data_reader.lifetime import LifeTime from typing import Self import logging + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -34,16 +48,16 @@ def mjd(self): return self._mjd @property - def energy(self): - return self._energy + def energies(self): + return self._energies @property - def ang_err(self): - return self._ang_err + def ang_errs(self): + return self._ang_errs @property - def coord(self): - return self._coord + def coords(self): + return self._coords @property def ra(self): @@ -52,23 +66,22 @@ def ra(self): @property def dec(self): return self._dec - - @property - def unit_vector(self): - return self._unit_vector @property - def event_type(self): - return self._event_type - + def unit_vectors(self): + return self._unit_vectors + @property - def int_event_type(self): - return self._int_event_type + def event_types(self): + return self._event_types + @property + def int_event_types(self): + return self._int_event_types @property def N(self): - return self.event_type.size + return self.event_types.size @u.quantity_input def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): @@ -78,7 +91,7 @@ def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): :type Emin: u.GeV :param Emax: Maximum energy, defaults to np.inf*u.GeV :type Emax: u.GeV, optional - """ + """ pass @classmethod @@ -97,29 +110,31 @@ class IceTrackDR2Events(Events): """ mjd_ = 3 - energy_ = 4 - ang_err_ = 5 - ra_ = 6 - dec_ = 7 + energies_ = 4 + ang_errs_ = 5 + ras_ = 6 + decs_ = 7 @u.quantity_input def __init__( self, - energy: u.GeV, - coord: SkyCoord, - event_type: np.ndarray, - ang_err: u.deg, + energies: u.GeV, + coords: SkyCoord, + event_types: np.ndarray, + ang_errs: u.deg, mjd: Time, ): - self._energy = energy - self._coord = coord - self._event_type = event_type - self._int_event_type = np.array([_.S for _ in event_type]) - self._ang_err = ang_err + self._energies = energies + self._coords = coords + self._event_types = event_types + self._int_event_types = np.array([_.S for _ in event_types]) + self._ang_errs = ang_errs self._mjd = mjd - self._coord.representation_type = "cartesian" - self._unit_vector = np.array([coord.x.value, coord.y.value, coord.z.value]).T - self._coord.representation_type = "spherical" + self._coords.representation_type = "cartesian" + self._unit_vectors = np.array( + [coords.x.value, coords.y.value, coords.z.value] + ).T + self._coords.representation_type = "spherical" def scramble_ra(self, seed: int = 42) -> None: """ @@ -155,21 +170,21 @@ def select(self, mask: npt.NDArray[np.bool_]): except AttributeError: pass - self._energy = self._energy[mask] - self._coord = self._coord[mask] - self._unit_vector = self._unit_vector[mask] - self._event_type = self._event_type[mask] - self._int_event_type = self._int_event_type[mask] - self._ang_err = self._ang_err[mask] + self._energies = self._energies[mask] + self._coords = self._coords[mask] + self._unit_vectors = self._unit_vectors[mask] + self._event_types = self._event_types[mask] + self._int_event_types = self._int_event_types[mask] + self._ang_errs = self._ang_errs[mask] self._mjd = self._mjd[mask] def to_file( - self, - path: Path, - append: bool = False, - group_name: str | None = None, - overwrite: bool = False - ) -> Path: + self, + path: Path, + append: bool = False, + group_name: str | None = None, + overwrite: bool = False, + ) -> Path: """Write events to file. Keyworded arguments control behaviour with existing files. If not overwrite, but `path` exists, append a timestamp to the file name. @@ -187,12 +202,12 @@ def to_file( :rtype: Path """ - self._file_keys = ["energy", "unit_vector", "event_type", "ang_err", "mjd"] + self._file_keys = ["energies", "unit_vectors", "event_types", "ang_errs", "mjd"] self._file_values = [ - self.energy.to(u.GeV).value, - self.unit_vector, - self.int_event_type, - self.ang_err.to(u.deg).value, + self.energies.to(u.GeV).value, + self.unit_vectors, + self.int_event_types, + self.ang_errs.to(u.deg).value, self.mjd.mjd, ] @@ -241,7 +256,7 @@ def to_file( def from_file( cls, filename: Path, - group_name:str = None, + group_name: str = None, ) -> Self: """Load events from .h5 file @@ -249,31 +264,31 @@ def from_file( :type filename: Path :param group_name: Name of events group, if provided when writing to file, defaults to None :type group_name: str, optional - """ + """ with h5py.File(filename, "r") as f: if group_name is None: events_folder = f["events"] else: events_folder = f[group_name] - energy = events_folder["energy"][()] * u.GeV - uv = events_folder["unit_vector"][()] - int_event_type = events_folder["event_type"][()] - event_type = np.array([Refrigerator.int2dm(_) for _ in int_event_type]) - ang_err = events_folder["ang_err"][()] * u.deg + energies = events_folder["energies"][()] * u.GeV + uv = events_folder["unit_vectors"][()] + int_event_types = events_folder["event_types"][()] + event_types = np.array([Refrigerator.int2dm(_) for _ in int_event_types]) + ang_errs = events_folder["ang_errs"][()] * u.deg # For backwards compatibility try: mjd = events_folder["mjd"][()] except KeyError: - mjd = [99.0] * len(energy) + mjd = [99.0] * len(energies) - coord = SkyCoord( + coords = SkyCoord( uv.T[0], uv.T[1], uv.T[2], representation_type="cartesian", frame="icrs" ) mjd = Time(mjd, format="mjd") - coord.representation_type = "spherical" - events = cls(energy, coord, event_type, ang_err, mjd) + coords.representation_type = "spherical" + events = cls(energies, coords, event_types, ang_errs, mjd) return events @@ -310,12 +325,12 @@ def from_event_files(cls, *seasons: EventType) -> Self: data_interface = IceCubeData() data_interface.fetch(I3_14) - energy = [] + energies = [] ra = [] dec = [] mjd = [] - ang_err = [] - event_type = [] + ang_errs = [] + event_types = [] def _append_data(s, suffering: str = ""): data = np.loadtxt( @@ -325,12 +340,12 @@ def _append_data(s, suffering: str = ""): ) ) - energy.append(data[:, cls.energy_]) + energies.append(data[:, cls.energies_]) mjd.append(data[:, cls.mjd_]) ra.append(data[:, cls.ra_]) dec.append(data[:, cls.dec_]) - ang_err.append(data[:, cls.ang_err_]) - event_type.append(len(data[:, cls.energy_]) * [s]) + ang_errs.append(data[:, cls.ang_errs_]) + event_types.append(len(data[:, cls.energies_]) * [s]) for s in seasons: if s == IC86: @@ -339,15 +354,15 @@ def _append_data(s, suffering: str = ""): else: _append_data(s) - energy = np.power(10, np.concatenate(energy)) << u.GeV + energies = np.power(10, np.concatenate(energies)) << u.GeV mjd = Time(np.concatenate(mjd), format="mjd") ra = np.concatenate(ra) << u.deg dec = np.concatenate(dec) << u.deg - ang_err = np.concatenate(ang_err) << u.deg - event_type = np.concatenate(event_type) - coord = SkyCoord(ra=ra, dec=dec, frame="icrs") + ang_errs = np.concatenate(ang_errs) << u.deg + event_types = np.concatenate(event_types) + coords = SkyCoord(ra=ra, dec=dec, frame="icrs") - events = cls(energy, coord, event_type, ang_err, mjd) + events = cls(energies, coords, event_types, ang_errs, mjd) events._ra = ra events._dec = dec From 65db1c09ef32c9b7837e2f7dbbb3a0500138d208 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 17:17:24 +0200 Subject: [PATCH 11/39] fix tests --- icecube_data_reader/events.py | 18 +++++++++--------- tests/test_events.py | 19 ++++++++++++------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index e73b3d4..40f8911 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -326,8 +326,8 @@ def from_event_files(cls, *seasons: EventType) -> Self: data_interface.fetch(I3_14) energies = [] - ra = [] - dec = [] + ras = [] + decs = [] mjd = [] ang_errs = [] event_types = [] @@ -342,8 +342,8 @@ def _append_data(s, suffering: str = ""): energies.append(data[:, cls.energies_]) mjd.append(data[:, cls.mjd_]) - ra.append(data[:, cls.ra_]) - dec.append(data[:, cls.dec_]) + ras.append(data[:, cls.ras_]) + decs.append(data[:, cls.decs_]) ang_errs.append(data[:, cls.ang_errs_]) event_types.append(len(data[:, cls.energies_]) * [s]) @@ -356,14 +356,14 @@ def _append_data(s, suffering: str = ""): energies = np.power(10, np.concatenate(energies)) << u.GeV mjd = Time(np.concatenate(mjd), format="mjd") - ra = np.concatenate(ra) << u.deg - dec = np.concatenate(dec) << u.deg + ras = np.concatenate(ras) << u.deg + decs = np.concatenate(decs) << u.deg ang_errs = np.concatenate(ang_errs) << u.deg event_types = np.concatenate(event_types) - coords = SkyCoord(ra=ra, dec=dec, frame="icrs") + coords = SkyCoord(ra=ras, dec=decs, frame="icrs") events = cls(energies, coords, event_types, ang_errs, mjd) - events._ra = ra - events._dec = dec + events._ras = ras + events._decs = decs return events diff --git a/tests/test_events.py b/tests/test_events.py index 5d3c607..0ed7bfe 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -12,12 +12,14 @@ def test_event_number(): events = IceTrackDR2Events.from_event_files() assert events.N == 1643355 + @pytest.fixture def test_saving(output_directory): events = IceTrackDR2Events.from_event_files(IC40) path = events.to_file(Path(output_directory) / "ic40_events.h5") return (path, events) + def test_overwriting(output_directory, test_saving): path, events = test_saving time_appended = events.to_file(path) @@ -28,7 +30,10 @@ def test_loading(test_saving): path, ic40 = test_saving loaded = IceTrackDR2Events.from_file(path) - assert np.all(np.isclose(loaded.energy.to_value(u.GeV), ic40.energy.to_value(u.GeV))) + assert np.all( + np.isclose(loaded.energies.to_value(u.GeV), ic40.energies.to_value(u.GeV)) + ) + def test_selecting(test_saving): idx = 5 @@ -37,12 +42,12 @@ def test_selecting(test_saving): mask = np.zeros(N, dtype=bool) mask[idx] = True - e = events.energy[idx].to_value(u.GeV) - et = events.int_event_type[idx] + e = events.energies[idx].to_value(u.GeV) + et = events.int_event_types[idx] events.select(mask) - assert pytest.approx(events.energy[0].to_value(u.GeV)) == e - assert et == events.int_event_type[0] + assert pytest.approx(events.energies[0].to_value(u.GeV)) == e + assert et == events.int_event_types[0] def test_erroneous_selecting(test_saving): @@ -53,6 +58,6 @@ def test_erroneous_selecting(test_saving): mask[idx] = True with pytest.raises(ValueError, match="Mask needs to be of the same length as N."): - mask = np.zeros(N+1, dtype=bool) + mask = np.zeros(N + 1, dtype=bool) mask[1] = 1 - events.select(mask) \ No newline at end of file + events.select(mask) From eadcbe773a46244414829464537bf6f4c3cb027c Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 28 May 2026 17:17:55 +0200 Subject: [PATCH 12/39] add test files --- tests/test_aeff.py | 0 tests/test_irf.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_aeff.py create mode 100644 tests/test_irf.py diff --git a/tests/test_aeff.py b/tests/test_aeff.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_irf.py b/tests/test_irf.py new file mode 100644 index 0000000..e69de29 From 4078c285db6a25882821dddef74a97ff2f457db9 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 08:32:19 +0200 Subject: [PATCH 13/39] add mjd generator --- icecube_data_reader/lifetime.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/icecube_data_reader/lifetime.py b/icecube_data_reader/lifetime.py index 0604413..88ac11b 100644 --- a/icecube_data_reader/lifetime.py +++ b/icecube_data_reader/lifetime.py @@ -7,8 +7,15 @@ import os from abc import ABC from astropy import units as u -from icecube_data_reader.downloader import data_directory, I3_14, available_datasets, IceCubeData +from astropy.time import Time +from icecube_data_reader.downloader import ( + data_directory, + I3_14, + available_datasets, + IceCubeData, +) from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType +from typing import TypedDict import logging @@ -46,7 +53,9 @@ def lifetime_from_mjd( for s in self._data.keys(): # Query histograms for fraction of total lifetime in a season # multiply by total lifetime in a season to get appropriate value - time = (self._dists[s].cdf(mjd_max) - self._dists[s].cdf(mjd_min)) * self._lifetimes[s] + time = ( + self._dists[s].cdf(mjd_max) - self._dists[s].cdf(mjd_min) + ) * self._lifetimes[s] # set atol to 1e-9 days, so we are below the time resolution of event mjd (1e-8 days) if squeeze and np.isclose(time.to_value(u.d), 0.0, atol=1e-9): time = 0 * u.yr @@ -55,7 +64,9 @@ def lifetime_from_mjd( return output - def lifetime_from_season(self, *seasons) -> dict[EventType | u.quantity.Quantity[u.yr]]: + def lifetime_from_season( + self, *seasons + ) -> dict[EventType | u.quantity.Quantity[u.yr]]: """Compute lifetime of given seasons :param seasons: Seasons to calculate lifetimes of @@ -134,3 +145,17 @@ def __init__(self): on_off[::2] = 1.0 # density=True for on_off to be treated as density self._dists[s] = stats.rv_histogram((on_off, bins), density=True) + + def draw_event_mjd( + self, event_numbers: dict[EventType, int] + ) -> dict[EventType, Time]: + """Draw event MJDs for scrambling the arrival times of events + TODO: add min/max mjd + + :param event_numbers: Dict of event type and requested numbers + :type event_numbers: dict[EventType, int] + :return: Dictionary of event types and new MJDs + :rtype: dict[EventType, Time] + """ + + raise NotImplementedError() From 3c717bad550fcb8d60e3b0fd01ca9362e9506b39 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 13:04:48 +0200 Subject: [PATCH 14/39] updates --- icecube_data_reader/irf/irf.py | 65 ++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 315f9e9..e381079 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -8,6 +8,7 @@ import numpy as np from scipy import stats import astropy.units as u +from astropy.coordinates import SkyCoord from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 from icecube_data_reader.event_types import EventType @@ -62,6 +63,7 @@ def _post_init(self): # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination self.recoE_hists = [None] * self.log_tE_bin_centers.size + self.recoE_sampling = [None] * self.log_tE_bin_centers.size # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram self.psf_hists = [[] for _ in self.log_tE_bin_centers] self.psf_bin_edges = [[] for _ in self.log_tE_bin_centers] @@ -83,7 +85,7 @@ def create_eres(self) -> None: """Create energy resolution histograms""" for c_tE in range(self.log_tE_bin_edges.size - 1): - if isinstance(self.recoE_hists[c_tE], stats.rv_histogram): + if isinstance(self.recoE_sampling[c_tE], stats.rv_histogram): continue self._create_recoE_distribution(c_tE) @@ -93,11 +95,11 @@ def create_ang_res(self) -> None: """Create angular resolution histograms""" for c_tE in range(self.log_tE_bin_centers.size): - if not isinstance(self.recoE_hists[c_tE], stats.rv_histogram): + if not isinstance(self.recoE_sampling[c_tE], stats.rv_histogram): frac_counts, bins, ereco_data = self._create_recoE_distribution( c_tE, return_data=True ) - self.recoE_hists[c_tE] = stats.rv_histogram( + self.recoE_sampling[c_tE] = stats.rv_histogram( (frac_counts, bins), density=False ) self.recoE_bin_edges[c_tE] = bins @@ -155,11 +157,57 @@ def load(cls, season: EventType, dec: u.deg) -> Self: irf.tE_bin_edges = tE_bin_edges irf.dec_bin_edges = dec_bin_edges irf.dec_idx = dec_idx + irf.dec_min = dec_bin_edges[dec_idx] * u.deg + irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg irf._post_init() return irf + @u.quantity_input + def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1): + """Simulate events + + :param coord: Source coordinate + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param seed: Random seed, defaults to 42 + :type seed: int, optional + :return: _description_ + :rtype: _type_ + """ + + if Etrue.shape == () and N > 1: + Etrue = np.full(N, Etrue.to_value(u.GeV)) << u.GeV + else: + Etrue = np.atleast_1d(Etrue.to_value(u.GeV)) << u.GeV + coord.representation_type = "spherical" + ra = coord.ra + dec = coord.dec + if dec > self.dec_max or dec < self.dec_min: + raise ValueError("IRF for DEC={dec} is not constructed.") + + coord.representation_type = "cartesian" + unit_vector = np.array([coord.x, coord.y, coord.z]) + + log_Et = np.log10(Etrue.to_value(u.GeV)) + tE_idx = np.digitize(log_Et, self.log_tE_bin_edges) - 1 + + recoE_out = np.zeros(Etrue.shape) + + set_e = np.unique(tE_idx) + for idx_e in set_e: + _index_e = np.argwhere(idx_e == tE_idx).squeeze() + recoE = self.recoE_sampling[idx_e].rvs( + size=_index_e.size, random_state=seed + ) + recoE_out[_index_e] = recoE + + if recoE_out.size == 1: + return recoE_out[0] + return recoE_out + @profile def _create_recoE_distribution( self, @@ -197,8 +245,11 @@ def _create_recoE_distribution( indices = np.nonzero(np.isclose(b, reduced_data[:, self.ereco_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - self.recoE_hists[c_e] = stats.rv_histogram((frac_counts, bins), density=False) + self.recoE_sampling[c_e] = stats.rv_histogram( + (frac_counts, bins), density=False + ) self.recoE_bin_edges[c_e] = bins + self.recoE_hists[c_e] = frac_counts / (frac_counts * np.diff(bins)).sum() if return_data: return frac_counts, bins, reduced_data @@ -259,7 +310,7 @@ def create_angular_distributions( hist = stats.rv_histogram((frac_counts, bins), density=False) self.psf_sampling[c_e][c_rE] = hist - self.psf_hists[c_e][c_rE] = frac_counts / frac_counts.sum() + self.psf_hists[c_e][c_rE] = frac_counts / (frac_counts * np.diff(bins)).sum() if return_data: return frac_counts, bins, reduced_data return frac_counts, bins @@ -301,7 +352,9 @@ def _create_ang_err_distribution( frac_counts[c_b] = np.sum(reduced_data[indices, -1]) hist = stats.rv_histogram((frac_counts, bins), density=False) self.ang_err_sampling[c_e][c_rE][c_psf] = hist - self.ang_err_hists[c_e][c_rE][c_psf] = frac_counts / frac_counts.sum() + self.ang_err_hists[c_e][c_rE][c_psf] = ( + frac_counts / (frac_counts * np.diff(bins)).sum() + ) if __name__ == "__main__": From 0d1efc67db30f81497bc20a47d95d7298bfc965f Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 14:18:07 +0200 Subject: [PATCH 15/39] fix tests, add methods --- icecube_data_reader/irf/irf.py | 4 ++-- icecube_data_reader/lifetime.py | 20 ++++++++++++++++++-- tests/test_downloader.py | 4 ++-- tests/test_lifetime.py | 7 +++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index e381079..d2b9b06 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -37,7 +37,7 @@ class AngularResolution(ABC): pass -class IceTrackDR2InstrumentResponseFunction( +class IceTracksDR2InstrumentResponseFunction( InstrumentResponseFunction, EnergyResolution, AngularResolution ): @@ -360,5 +360,5 @@ def _create_ang_err_distribution( if __name__ == "__main__": from icecube_data_reader.event_types import IC86 - irf = IceTrackDR2InstrumentResponseFunction.load(IC86) + irf = IceTracksDR2InstrumentResponseFunction.load(IC86) irf.create_IRF() diff --git a/icecube_data_reader/lifetime.py b/icecube_data_reader/lifetime.py index 88ac11b..21b8735 100644 --- a/icecube_data_reader/lifetime.py +++ b/icecube_data_reader/lifetime.py @@ -58,11 +58,27 @@ def lifetime_from_mjd( ) * self._lifetimes[s] # set atol to 1e-9 days, so we are below the time resolution of event mjd (1e-8 days) if squeeze and np.isclose(time.to_value(u.d), 0.0, atol=1e-9): - time = 0 * u.yr + continue output[s] = time return output + + def mjd_from_season(self, season: EventType) -> tuple[float, float]: + """Return MJD_min, MJD_max for provided season + + :param season: Event type + :type season: EventType + :return: Tuple of MJD_min, MJD_max + :rtype: tuple[float, float] + """ + + for c, v in enumerate([IC40, IC59, IC79, IC86]): + if v == season: + break + + return self._times[c, 0], self._times[c, 1] + def lifetime_from_season( self, *seasons @@ -81,7 +97,7 @@ def lifetime_from_season( return output -class DR2LifeTime(LifeTime): +class IceTrackDR2LifeTime(LifeTime): def __init__(self): directory = available_datasets[I3_14]["dir"] diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 88dc635..7685376 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,8 +1,8 @@ from icecube_data_reader.downloader import IceCubeData, I3_10 - +import pytest data = IceCubeData() - +@pytest.mark.skip("convenience") def test_file_download(output_directory): data.fetch(I3_10, write_to=output_directory) diff --git a/tests/test_lifetime.py b/tests/test_lifetime.py index 603701d..497d78d 100644 --- a/tests/test_lifetime.py +++ b/tests/test_lifetime.py @@ -1,10 +1,10 @@ -from icecube_data_reader.lifetime import DR2LifeTime +from icecube_data_reader.lifetime import IceTrackDR2LifeTime from icecube_data_reader.event_types import DR2, IC40 import numpy as np from astropy import units as u import pytest -lt = DR2LifeTime() +lt = IceTrackDR2LifeTime() def test_total_lt(): @@ -22,3 +22,6 @@ def test_lt_from_mjd(): obs_time = lt.lifetime_from_mjd(mjd_min, mjd_max)[IC40].to_value(u.d) comparison = np.sum(np.diff(lt._data[IC40])[:2]) assert pytest.approx(obs_time) == comparison + +def test_mjd_from_season(): + pass From be915a48e6ea2b66762fdfdadfd9d057b71fcae3 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 14:32:48 +0200 Subject: [PATCH 16/39] add check for completely empty IRF entries --- icecube_data_reader/irf/irf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index d2b9b06..0e89959 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -43,6 +43,8 @@ class IceTracksDR2InstrumentResponseFunction( def __init__(self, data: np.ndarray, season: EventType): """ + DO NOT instantiate it via init, but rather through the class method `load` + :param path: Path to smearing matrix :type path: Path :param season: Detector season @@ -72,6 +74,12 @@ def _post_init(self): self.ang_err_bin_edges = [[] for _ in self.log_tE_bin_centers] self.ang_err_sampling = [[] for _ in self.log_tE_bin_centers] + self.faulty = [] + for c_e in range(self.log_tE_bin_center.size): + reduced = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] + if reduced[:, -1].sum() == 0.: + self.faulty.append(c_e) + @u.quantity_input @profile def create_IRF(self) -> None: From 669a99012f3f87d6c2e3131ce0f8dca863aa886e Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 15:38:50 +0200 Subject: [PATCH 17/39] minor changes --- icecube_data_reader/irf/effective_area.py | 2 +- icecube_data_reader/irf/irf.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/icecube_data_reader/irf/effective_area.py b/icecube_data_reader/irf/effective_area.py index e3736ef..5c46d07 100644 --- a/icecube_data_reader/irf/effective_area.py +++ b/icecube_data_reader/irf/effective_area.py @@ -94,7 +94,7 @@ def load(cls, season: EventType, data_path: Path = data_directory) -> Self: """ season_string = str(season) - file = data_directory / Path( + file = data_path / Path( available_datasets[I3_14]["dir"] ) / Path( available_datasets[I3_14]["subdir"] diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 0e89959..edbcf59 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -164,6 +164,8 @@ def load(cls, season: EventType, dec: u.deg) -> Self: irf.log_tE_bin_centers = log_tE_bin_centers irf.tE_bin_edges = tE_bin_edges irf.dec_bin_edges = dec_bin_edges + irf.sin_dec_bin_edges = np.sin(np.deg2rad(dec_bin_edges)) + irf.sin_dec_bin_centers = (irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:]) / 2 irf.dec_idx = dec_idx irf.dec_min = dec_bin_edges[dec_idx] * u.deg irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg From fc4da15c829fedc7ebdb729d2daf0df8185bd3f3 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 29 May 2026 17:44:48 +0200 Subject: [PATCH 18/39] add dec again... --- icecube_data_reader/irf/irf.py | 167 +++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 72 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index edbcf59..07caddc 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Self +from itertools import pairwise import numpy as np from scipy import stats import astropy.units as u @@ -12,6 +13,7 @@ from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 from icecube_data_reader.event_types import EventType +from icecube_data_reader.utils.utils import DummyPDF import logging @@ -55,30 +57,36 @@ def __init__(self, data: np.ndarray, season: EventType): self.season = season self.etrue_idx = 0 - self.ereco_idx = 2 - self.psf_idx = 4 - self.ang_err_idx = 6 + self.dec_idx = 2 + self.ereco_idx = 4 + self.psf_idx = 6 + self.ang_err_idx = 8 def _post_init(self): # Break naming convention because r and t are too close on the keyboard - self.recoE_bin_edges = [None] * self.log_tE_bin_centers.size + self.recoE_bin_edges = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination - self.recoE_hists = [None] * self.log_tE_bin_centers.size - self.recoE_sampling = [None] * self.log_tE_bin_centers.size + self.recoE_hists = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size + self.recoE_sampling = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram - self.psf_hists = [[] for _ in self.log_tE_bin_centers] - self.psf_bin_edges = [[] for _ in self.log_tE_bin_centers] - self.psf_sampling = [[] for _ in self.log_tE_bin_centers] - self.ang_err_hists = [[] for _ in self.log_tE_bin_centers] - self.ang_err_bin_edges = [[] for _ in self.log_tE_bin_centers] - self.ang_err_sampling = [[] for _ in self.log_tE_bin_centers] + self.psf_hists = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.psf_bin_edges = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.psf_sampling = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.ang_err_hists = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.ang_err_bin_edges = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.ang_err_sampling = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] self.faulty = [] - for c_e in range(self.log_tE_bin_center.size): - reduced = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] - if reduced[:, -1].sum() == 0.: - self.faulty.append(c_e) + for c_e in range(self.log_tE_bin_centers.size): + for c_d, d_l in enumerate(self.dec_bin_edges[:-1]): + idx = np.argwhere( + (self.data[:, 0] == self.log_tE_bin_edges[c_e]) + * (self.data[:, 2] == d_l) + ).squeeze() + reduced = self.data[idx] + if reduced[:, -1].sum() == 0.: + self.faulty.append((c_e, c_d)) @u.quantity_input @profile @@ -92,10 +100,12 @@ def create_IRF(self) -> None: def create_eres(self) -> None: """Create energy resolution histograms""" - for c_tE in range(self.log_tE_bin_edges.size - 1): - if isinstance(self.recoE_sampling[c_tE], stats.rv_histogram): - continue - self._create_recoE_distribution(c_tE) + for c_tE in range(self.log_tE_bin_centers.size): + for c_d in range(self.sin_dec_bin_centers.size): + if isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram) \ + or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + continue + self._create_recoE_distribution(c_tE, c_d) @u.quantity_input @profile @@ -103,30 +113,31 @@ def create_ang_res(self) -> None: """Create angular resolution histograms""" for c_tE in range(self.log_tE_bin_centers.size): - if not isinstance(self.recoE_sampling[c_tE], stats.rv_histogram): - frac_counts, bins, ereco_data = self._create_recoE_distribution( - c_tE, return_data=True - ) - self.recoE_sampling[c_tE] = stats.rv_histogram( - (frac_counts, bins), density=False - ) - self.recoE_bin_edges[c_tE] = bins - else: - ereco_data = None - bins = self.recoE_bin_edges[c_tE] - # bins is reco energy, each bin is mapped to a PSF distribution - self.psf_hists[c_tE] = [None] * (bins.size - 1) - self.psf_bin_edges[c_tE] = [None] * (bins.size - 1) - self.psf_sampling[c_tE] = [None] * (bins.size - 1) - - self.ang_err_hists[c_tE] = [None] * (bins.size - 1) - self.ang_err_bin_edges[c_tE] = [None] * (bins.size - 1) - self.ang_err_sampling[c_tE] = [None] * (bins.size - 1) - for c_rE in range(bins.size - 1): - self.create_angular_distributions(c_tE, c_rE, data=ereco_data) + for c_d in range(self.sin_dec_bin_centers.size): + if not isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram): + frac_counts, bins, ereco_data = self._create_recoE_distribution( + c_tE, c_d, return_data=True + ) + self.recoE_sampling[c_tE][c_d] = stats.rv_histogram( + (frac_counts, bins), density=False + ) + self.recoE_bin_edges[c_tE][c_d] = bins + else: + ereco_data = None + bins = self.recoE_bin_edges[c_tE][c_d] + # bins is reco energy, each bin is mapped to a PSF distribution + self.psf_hists[c_tE][c_d] = [None] * (bins.size - 1) + self.psf_bin_edges[c_tE][c_d] = [None] * (bins.size - 1) + self.psf_sampling[c_tE][c_d] = [None] * (bins.size - 1) + + self.ang_err_hists[c_tE][c_d] = [None] * (bins.size - 1) + self.ang_err_bin_edges[c_tE][c_d] = [None] * (bins.size - 1) + self.ang_err_sampling[c_tE][c_d] = [None] * (bins.size - 1) + for c_rE in range(bins.size - 1): + self.create_angular_distributions(c_tE, c_d, c_rE, data=ereco_data) @classmethod - def load(cls, season: EventType, dec: u.deg) -> Self: + def load(cls, season: EventType) -> Self: """Create energy resolution object for provided season :param season: Season @@ -154,11 +165,11 @@ def load(cls, season: EventType, dec: u.deg) -> Self: tE_bin_edges = np.power(10, log_tE_bin_edges) << u.GeV dec_bin_edges = np.sort(np.unique(data[:, 2:4].flatten())) - dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 - data = data[data[:, 2] == dec_bin_edges[dec_idx]] + #dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 + #data = data[data[:, 2] == dec_bin_edges[dec_idx]] # Remove declination bc we only use one declination bin - mask = [True, True, False, False, True, True, True, True, True, True, True] - data = data[:, mask] + #mask = [True, True, False, False, True, True, True, True, True, True, True] + #data = data[:, mask] irf = cls(data, season) irf.log_tE_bin_edges = log_tE_bin_edges irf.log_tE_bin_centers = log_tE_bin_centers @@ -166,9 +177,9 @@ def load(cls, season: EventType, dec: u.deg) -> Self: irf.dec_bin_edges = dec_bin_edges irf.sin_dec_bin_edges = np.sin(np.deg2rad(dec_bin_edges)) irf.sin_dec_bin_centers = (irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:]) / 2 - irf.dec_idx = dec_idx - irf.dec_min = dec_bin_edges[dec_idx] * u.deg - irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg + #irf.dec_idx = dec_idx + #irf.dec_min = dec_bin_edges[dec_idx] * u.deg + #irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg irf._post_init() @@ -178,7 +189,8 @@ def load(cls, season: EventType, dec: u.deg) -> Self: def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1): """Simulate events - :param coord: Source coordinate + :param coord: Source coordinate, + assumes only one coordinate per function call :type coord: SkyCoord :param Etrue: True neutrino energy :type Etrue: u.GeV @@ -189,27 +201,26 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = """ if Etrue.shape == () and N > 1: - Etrue = np.full(N, Etrue.to_value(u.GeV)) << u.GeV + Etrue = np.full(N, Etrue.to_value(u.GeV)) else: - Etrue = np.atleast_1d(Etrue.to_value(u.GeV)) << u.GeV + Etrue = np.atleast_1d(Etrue.to_value(u.GeV)) coord.representation_type = "spherical" ra = coord.ra dec = coord.dec - if dec > self.dec_max or dec < self.dec_min: - raise ValueError("IRF for DEC={dec} is not constructed.") + c_d = np.digitize(dec.deg, self.dec_bin_edges) - 1 coord.representation_type = "cartesian" unit_vector = np.array([coord.x, coord.y, coord.z]) - log_Et = np.log10(Etrue.to_value(u.GeV)) - tE_idx = np.digitize(log_Et, self.log_tE_bin_edges) - 1 + log_tE = np.log10(Etrue) + tE_idx = np.digitize(log_tE, self.log_tE_bin_edges) - 1 recoE_out = np.zeros(Etrue.shape) set_e = np.unique(tE_idx) for idx_e in set_e: - _index_e = np.argwhere(idx_e == tE_idx).squeeze() - recoE = self.recoE_sampling[idx_e].rvs( + _index_e = np.argwhere(idx_e == tE_idx) + recoE = self.recoE_sampling[idx_e][c_d].rvs( size=_index_e.size, random_state=seed ) recoE_out[_index_e] = recoE @@ -222,6 +233,7 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = def _create_recoE_distribution( self, c_e: int, + c_d: int, return_data: bool = False, ) -> tuple[np.ndarray, ...]: """Creates the reconstructed energy distribution for given true @@ -242,6 +254,7 @@ def _create_recoE_distribution( reduced_data = self.data[ self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] ] + reduced_data = reduced_data[reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d]] # Create bin edges of reco energy bins = np.sort( @@ -255,11 +268,11 @@ def _create_recoE_distribution( indices = np.nonzero(np.isclose(b, reduced_data[:, self.ereco_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - self.recoE_sampling[c_e] = stats.rv_histogram( + self.recoE_sampling[c_e][c_d] = stats.rv_histogram( (frac_counts, bins), density=False ) - self.recoE_bin_edges[c_e] = bins - self.recoE_hists[c_e] = frac_counts / (frac_counts * np.diff(bins)).sum() + self.recoE_bin_edges[c_e][c_d] = bins + self.recoE_hists[c_e][c_d] = frac_counts / (frac_counts * np.diff(bins)).sum() if return_data: return frac_counts, bins, reduced_data @@ -269,6 +282,7 @@ def _create_recoE_distribution( def create_angular_distributions( self, c_e: int, + c_d: int, c_rE: int, data: None | np.ndarray = None, return_data: bool = False, @@ -291,13 +305,17 @@ def create_angular_distributions( if data is None: # Get entries at relevant true energy and declination reduced_data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] + reduced_data = reduced_data[ + reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] + ] + else: reduced_data = data # Get entries at relevant reco energy reduced_data = reduced_data[ - reduced_data[:, 2] == self.recoE_bin_edges[c_e][c_rE] + reduced_data[:, self.ereco_idx] == self.recoE_bin_edges[c_e][c_d][c_rE] ] # Create bin edges of kinematic angle / PSF @@ -305,22 +323,26 @@ def create_angular_distributions( np.unique(reduced_data[:, self.psf_idx : self.psf_idx + 2].flatten()) ) - self.psf_bin_edges[c_e][c_rE] = bins - self.ang_err_sampling[c_e][c_rE] = [None] * (bins.size - 1) - self.ang_err_hists[c_e][c_rE] = [None] * (bins.size - 1) - self.ang_err_bin_edges[c_e][c_rE] = [None] * (bins.size - 1) + if bins.size == 0: + #self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) + #self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) + return frac_counts = np.zeros(bins.size - 1) + self.psf_bin_edges[c_e][c_d][c_rE] = bins + self.ang_err_sampling[c_e][c_d][c_rE] = [None] * (bins.size - 1) + self.ang_err_hists[c_e][c_d][c_rE] = [None] * (bins.size - 1) + self.ang_err_bin_edges[c_e][c_d][c_rE] = [None] * (bins.size - 1) # Calculate fractional count in each PSF bin by marginalising over ang_err for c_b, b in enumerate(bins[:-1]): indices = np.nonzero(np.isclose(b, reduced_data[:, self.psf_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] - self._create_ang_err_distribution(c_e, c_rE, c_b, psf_reduced_data) + self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) hist = stats.rv_histogram((frac_counts, bins), density=False) - self.psf_sampling[c_e][c_rE] = hist - self.psf_hists[c_e][c_rE] = frac_counts / (frac_counts * np.diff(bins)).sum() + self.psf_sampling[c_e][c_d][c_rE] = hist + self.psf_hists[c_e][c_d][c_rE] = frac_counts / (frac_counts * np.diff(bins)).sum() if return_data: return frac_counts, bins, reduced_data return frac_counts, bins @@ -329,6 +351,7 @@ def create_angular_distributions( def _create_ang_err_distribution( self, c_e: int, + c_d: int, c_rE: int, c_psf: int, reduced_data: np.ndarray, @@ -354,15 +377,15 @@ def _create_ang_err_distribution( reduced_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() ) ) - self.ang_err_bin_edges[c_e][c_rE][c_psf] = bins + self.ang_err_bin_edges[c_e][c_d][c_rE][c_psf] = bins frac_counts = np.zeros(bins.size - 1) for c_b, b in enumerate(bins[:-1]): indices = np.nonzero(np.isclose(b, reduced_data[:, self.ang_err_idx])) frac_counts[c_b] = np.sum(reduced_data[indices, -1]) hist = stats.rv_histogram((frac_counts, bins), density=False) - self.ang_err_sampling[c_e][c_rE][c_psf] = hist - self.ang_err_hists[c_e][c_rE][c_psf] = ( + self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist + self.ang_err_hists[c_e][c_d][c_rE][c_psf] = ( frac_counts / (frac_counts * np.diff(bins)).sum() ) From 85d6dfdd87f41961893e3cebbc3133062386f777 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Sat, 30 May 2026 16:59:17 +0200 Subject: [PATCH 19/39] small changes --- icecube_data_reader/event_types.py | 4 +++ icecube_data_reader/events.py | 41 ++++++++++++----------- icecube_data_reader/irf/effective_area.py | 25 ++++++++------ icecube_data_reader/lifetime.py | 18 +++++----- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/icecube_data_reader/event_types.py b/icecube_data_reader/event_types.py index d4f9774..55667ba 100644 --- a/icecube_data_reader/event_types.py +++ b/icecube_data_reader/event_types.py @@ -136,4 +136,8 @@ def int2dm(cls, int_): return dm else: raise ValueError(f"No detector {int_} available.") + + @classmethod + def str2dm(cls, str_): + return cls.int2dm(cls.str2int(str_)) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index 169d873..21fa077 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -75,16 +75,16 @@ def unit_vectors(self): return self._unit_vectors @property - def event_types(self): - return self._event_types + def types(self): + return self._types @property - def int_event_types(self): - return self._int_event_types + def int_types(self): + return self._int_types @property def N(self): - return self.event_types.size + return self.types.size @u.quantity_input def apply_energy_cut(self, Emin: u.GeV, Emax: u.GeV = np.inf * u.GeV): @@ -127,14 +127,14 @@ def __init__( self, energies: u.GeV, coords: SkyCoord, - event_types: np.ndarray, + types: np.ndarray, ang_errs: u.deg, mjd: Time, ): self._energies = energies self._coords = coords - self._event_types = event_types - self._int_event_types = np.array([_.S for _ in event_types]) + self._types = types + self._int_types = np.array([_.S for _ in types]) self._ang_errs = ang_errs self._mjd = mjd self._coords.representation_type = "cartesian" @@ -180,8 +180,8 @@ def select(self, mask: npt.NDArray[np.bool_]) -> None: self._energies = self._energies[mask] self._coords = self._coords[mask] self._unit_vectors = self._unit_vectors[mask] - self._event_types = self._event_types[mask] - self._int_event_types = self._int_event_types[mask] + self._types = self._types[mask] + self._int_types = self._int_types[mask] self._ang_errs = self._ang_errs[mask] self._mjd = self._mjd[mask] @@ -222,11 +222,11 @@ def to_file( :rtype: Path """ - self._file_keys = ["energies", "unit_vectors", "event_types", "ang_errs", "mjd"] + self._file_keys = ["energies", "unit_vectors", "types", "ang_errs", "mjd"] self._file_values = [ self.energies.to(u.GeV).value, self.unit_vectors, - self.int_event_types, + self.int_types, self.ang_errs.to(u.deg).value, self.mjd.mjd, ] @@ -293,8 +293,8 @@ def from_file( energies = events_folder["energies"][()] * u.GeV uv = events_folder["unit_vectors"][()] - int_event_types = events_folder["event_types"][()] - event_types = np.array([Refrigerator.int2dm(_) for _ in int_event_types]) + int_types = events_folder["types"][()] + types = np.array([Refrigerator.int2dm(_) for _ in int_types]) ang_errs = events_folder["ang_errs"][()] * u.deg # For backwards compatibility @@ -308,7 +308,7 @@ def from_file( ) mjd = Time(mjd, format="mjd") coords.representation_type = "spherical" - events = cls(energies, coords, event_types, ang_errs, mjd) + events = cls(energies, coords, types, ang_errs, mjd) return events @@ -325,7 +325,7 @@ def apply_energy_cut(self, Emin: u.GeV, Emax=np.inf * u.GeV) -> None: self.select(mask) @classmethod - def from_event_files(cls, *seasons: EventType) -> Self: + def from_event_files(cls, *seasons: EventType | str) -> Self: """ Load data of provided seasons. If none are provided, use all. @@ -361,7 +361,7 @@ def from_event_files(cls, *seasons: EventType) -> Self: decs = [] mjd = [] ang_errs = [] - event_types = [] + types = [] def _append_data(s, suffering: str = ""): data = np.loadtxt( @@ -376,9 +376,10 @@ def _append_data(s, suffering: str = ""): ras.append(data[:, cls.ras_]) decs.append(data[:, cls.decs_]) ang_errs.append(data[:, cls.ang_errs_]) - event_types.append(len(data[:, cls.energies_]) * [s]) + types.append(len(data[:, cls.energies_]) * [s]) for s in seasons: + s = Refrigerator.str2dm(s) if isinstance(s, str) else s if s == IC86: for suffering in suffixes: _append_data(s, suffering) @@ -390,10 +391,10 @@ def _append_data(s, suffering: str = ""): ras = np.concatenate(ras) << u.deg decs = np.concatenate(decs) << u.deg ang_errs = np.concatenate(ang_errs) << u.deg - event_types = np.concatenate(event_types) + types = np.concatenate(types) coords = SkyCoord(ra=ras, dec=decs, frame="icrs") - events = cls(energies, coords, event_types, ang_errs, mjd) + events = cls(energies, coords, types, ang_errs, mjd) events._ras = ras events._decs = decs diff --git a/icecube_data_reader/irf/effective_area.py b/icecube_data_reader/irf/effective_area.py index 5c46d07..213b7db 100644 --- a/icecube_data_reader/irf/effective_area.py +++ b/icecube_data_reader/irf/effective_area.py @@ -46,14 +46,14 @@ def cosz_bin_edges(self): return self._cosz_bin_edges - @property - def sin_dec_bin_edges(self): - """ - sin(dec) bin edges corresponding to the - histogram in eff_area - """ - - return self._sin_dec_bin_edges + #@property + #def sin_dec_bin_edges(self): + # """ + # sin(dec) bin edges corresponding to the + # histogram in eff_area + # """ + # + # return self._sin_dec_bin_edges @property def tE_bin_edges(self): @@ -76,10 +76,13 @@ def __init__( season: EventType, ): + self._season = season self._tE_bin_edges = tE_bin_edges - self._eff_area = eff_area - self._sin_dec_bin_edges = np.sin(dec_bin_edges.to_value(u.rad)) - self._cosz_bin_edges = - np.sin(dec_bin_edges.to_value(u.rad)) + # this inverts the order of bins + self._cosz_bin_edges = np.sort(- np.sin(dec_bin_edges.to_value(u.rad))) + # hence we invert the order of elements along the cosz axis of eff_area + self._eff_area = np.flip(eff_area, axis=1) + #self._sin_dec_bin_edges = np.sin(dec_bin_edges.to_value(u.rad)) @classmethod def load(cls, season: EventType, data_path: Path = data_directory) -> Self: diff --git a/icecube_data_reader/lifetime.py b/icecube_data_reader/lifetime.py index 21b8735..d743622 100644 --- a/icecube_data_reader/lifetime.py +++ b/icecube_data_reader/lifetime.py @@ -14,8 +14,7 @@ available_datasets, IceCubeData, ) -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType -from typing import TypedDict +from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType, Refrigerator import logging @@ -64,34 +63,37 @@ def lifetime_from_mjd( return output - def mjd_from_season(self, season: EventType) -> tuple[float, float]: + def mjd_from_season(self, season: EventType | str) -> tuple[float, float]: """Return MJD_min, MJD_max for provided season - :param season: Event type - :type season: EventType + :param season: Event type, either the EventType class or str(EventType) + :type season: EventType | str :return: Tuple of MJD_min, MJD_max :rtype: tuple[float, float] """ for c, v in enumerate([IC40, IC59, IC79, IC86]): - if v == season: + if str(v) == str(season): break return self._times[c, 0], self._times[c, 1] def lifetime_from_season( - self, *seasons + self, *seasons: EventType | str ) -> dict[EventType | u.quantity.Quantity[u.yr]]: """Compute lifetime of given seasons - :param seasons: Seasons to calculate lifetimes of + :param seasons: Seasons to calculate lifetimes of, either EventType or str(EventType) + :type seasons: EventType | str :return: Dictionary with detector season: duration in astropy.units.yr :rtype: dict[EventType|u.quantity.Quantity[u.yr]] """ output = {} for s in seasons: + if isinstance(s, str): + s = Refrigerator.str2dm(s) output[s] = self._lifetimes[s] return output From 1dd241387a390c283fb87caa496b83773ae3cfec Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Sat, 30 May 2026 17:00:19 +0200 Subject: [PATCH 20/39] fix tests --- tests/test_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_events.py b/tests/test_events.py index 0ed7bfe..879d81f 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -43,11 +43,11 @@ def test_selecting(test_saving): mask[idx] = True e = events.energies[idx].to_value(u.GeV) - et = events.int_event_types[idx] + et = events.int_types[idx] events.select(mask) assert pytest.approx(events.energies[0].to_value(u.GeV)) == e - assert et == events.int_event_types[0] + assert et == events.int_types[0] def test_erroneous_selecting(test_saving): From 676ac738accb91e3e9617b5d5759ac60e1a36938 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Mon, 1 Jun 2026 12:02:32 +0200 Subject: [PATCH 21/39] marginal gains, add test --- icecube_data_reader/irf/irf.py | 114 +++++++++++++++++++++------------ tests/test_irf.py | 28 ++++++++ 2 files changed, 102 insertions(+), 40 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 07caddc..b154632 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -64,18 +64,36 @@ def __init__(self, data: np.ndarray, season: EventType): def _post_init(self): # Break naming convention because r and t are too close on the keyboard - self.recoE_bin_edges = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size + self.recoE_bin_edges = [ + [None] * self.sin_dec_bin_centers.size + ] * self.log_tE_bin_centers.size # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination - self.recoE_hists = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size - self.recoE_sampling = [[None] * self.sin_dec_bin_centers.size] * self.log_tE_bin_centers.size + self.recoE_hists = [ + [None] * self.sin_dec_bin_centers.size + ] * self.log_tE_bin_centers.size + self.recoE_sampling = [ + [None] * self.sin_dec_bin_centers.size + ] * self.log_tE_bin_centers.size # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram - self.psf_hists = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] - self.psf_bin_edges = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] - self.psf_sampling = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] - self.ang_err_hists = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] - self.ang_err_bin_edges = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] - self.ang_err_sampling = [[[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers] + self.psf_hists = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.psf_bin_edges = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.psf_sampling = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.ang_err_hists = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.ang_err_bin_edges = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] + self.ang_err_sampling = [ + [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers + ] self.faulty = [] for c_e in range(self.log_tE_bin_centers.size): @@ -85,7 +103,7 @@ def _post_init(self): * (self.data[:, 2] == d_l) ).squeeze() reduced = self.data[idx] - if reduced[:, -1].sum() == 0.: + if reduced[:, -1].sum() == 0.0: self.faulty.append((c_e, c_d)) @u.quantity_input @@ -102,8 +120,9 @@ def create_eres(self) -> None: for c_tE in range(self.log_tE_bin_centers.size): for c_d in range(self.sin_dec_bin_centers.size): - if isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram) \ - or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + if isinstance( + self.recoE_sampling[c_tE][c_d], stats.rv_histogram + ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): continue self._create_recoE_distribution(c_tE, c_d) @@ -165,21 +184,26 @@ def load(cls, season: EventType) -> Self: tE_bin_edges = np.power(10, log_tE_bin_edges) << u.GeV dec_bin_edges = np.sort(np.unique(data[:, 2:4].flatten())) - #dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 - #data = data[data[:, 2] == dec_bin_edges[dec_idx]] + # use log binning for angular quantities + data[:, 6:-1] = np.log10(data[:, 6:-1]) + + # dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 + # data = data[data[:, 2] == dec_bin_edges[dec_idx]] # Remove declination bc we only use one declination bin - #mask = [True, True, False, False, True, True, True, True, True, True, True] - #data = data[:, mask] + # mask = [True, True, False, False, True, True, True, True, True, True, True] + # data = data[:, mask] irf = cls(data, season) irf.log_tE_bin_edges = log_tE_bin_edges irf.log_tE_bin_centers = log_tE_bin_centers irf.tE_bin_edges = tE_bin_edges irf.dec_bin_edges = dec_bin_edges irf.sin_dec_bin_edges = np.sin(np.deg2rad(dec_bin_edges)) - irf.sin_dec_bin_centers = (irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:]) / 2 - #irf.dec_idx = dec_idx - #irf.dec_min = dec_bin_edges[dec_idx] * u.deg - #irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg + irf.sin_dec_bin_centers = ( + irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:] + ) / 2 + # irf.dec_idx = dec_idx + # irf.dec_min = dec_bin_edges[dec_idx] * u.deg + # irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg irf._post_init() @@ -211,6 +235,7 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = coord.representation_type = "cartesian" unit_vector = np.array([coord.x, coord.y, coord.z]) + coord.representation_type = "spherical" log_tE = np.log10(Etrue) tE_idx = np.digitize(log_tE, self.log_tE_bin_edges) - 1 @@ -219,7 +244,7 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = set_e = np.unique(tE_idx) for idx_e in set_e: - _index_e = np.argwhere(idx_e == tE_idx) + _index_e = np.argwhere(idx_e == tE_idx).squeeze() recoE = self.recoE_sampling[idx_e][c_d].rvs( size=_index_e.size, random_state=seed ) @@ -254,7 +279,9 @@ def _create_recoE_distribution( reduced_data = self.data[ self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] ] - reduced_data = reduced_data[reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d]] + reduced_data = reduced_data[ + reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] + ] # Create bin edges of reco energy bins = np.sort( @@ -265,14 +292,15 @@ def _create_recoE_distribution( # marginalise over angular quantities for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, self.ereco_idx])) - frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + frac_counts[c_b] = np.sum( + reduced_data[b == reduced_data[:, self.ereco_idx], -1] + ) self.recoE_sampling[c_e][c_d] = stats.rv_histogram( (frac_counts, bins), density=False ) self.recoE_bin_edges[c_e][c_d] = bins - self.recoE_hists[c_e][c_d] = frac_counts / (frac_counts * np.diff(bins)).sum() + self.recoE_hists[c_e][c_d] = frac_counts / frac_counts.sum() if return_data: return frac_counts, bins, reduced_data @@ -309,7 +337,6 @@ def create_angular_distributions( reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] ] - else: reduced_data = data @@ -324,8 +351,14 @@ def create_angular_distributions( ) if bins.size == 0: - #self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) - #self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) + # Happens for Ereco bins with frac_counts = 0 + # create empty histograms for this specific chain of + # psf and ang_err + self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) + self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) + self.ang_err_sampling[c_e][c_d][c_rE] = [DummyPDF()] * 20 + self.ang_err_hists[c_e][c_d][c_rE] = [np.zeros(20)] * 20 + self.ang_err_bin_edges[c_e][c_d][c_rE] = [np.zeros(21)] * 20 return frac_counts = np.zeros(bins.size - 1) @@ -335,14 +368,15 @@ def create_angular_distributions( self.ang_err_bin_edges[c_e][c_d][c_rE] = [None] * (bins.size - 1) # Calculate fractional count in each PSF bin by marginalising over ang_err for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, self.psf_idx])) - frac_counts[c_b] = np.sum(reduced_data[indices, -1]) + frac_counts[c_b] = np.sum( + reduced_data[b == reduced_data[:, self.psf_idx], -1] + ) psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) - hist = stats.rv_histogram((frac_counts, bins), density=False) - self.psf_sampling[c_e][c_d][c_rE] = hist - self.psf_hists[c_e][c_d][c_rE] = frac_counts / (frac_counts * np.diff(bins)).sum() + # hist = stats.rv_histogram((frac_counts, bins), density=False) + # self.psf_sampling[c_e][c_d][c_rE] = hist + self.psf_hists[c_e][c_d][c_rE] = frac_counts / frac_counts.sum() if return_data: return frac_counts, bins, reduced_data return frac_counts, bins @@ -377,17 +411,17 @@ def _create_ang_err_distribution( reduced_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() ) ) + self.ang_err_bin_edges[c_e][c_d][c_rE][c_psf] = bins frac_counts = np.zeros(bins.size - 1) for c_b, b in enumerate(bins[:-1]): - indices = np.nonzero(np.isclose(b, reduced_data[:, self.ang_err_idx])) - frac_counts[c_b] = np.sum(reduced_data[indices, -1]) - hist = stats.rv_histogram((frac_counts, bins), density=False) - self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist - self.ang_err_hists[c_e][c_d][c_rE][c_psf] = ( - frac_counts / (frac_counts * np.diff(bins)).sum() - ) + frac_counts[c_b] = np.sum( + reduced_data[b == reduced_data[:, self.ang_err_idx], -1] + ) + # hist = stats.rv_histogram((frac_counts, bins), density=False) + # self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist + self.ang_err_hists[c_e][c_d][c_rE][c_psf] = frac_counts / frac_counts.sum() if __name__ == "__main__": diff --git a/tests/test_irf.py b/tests/test_irf.py index e69de29..73458bb 100644 --- a/tests/test_irf.py +++ b/tests/test_irf.py @@ -0,0 +1,28 @@ +import pytest + +from icecube_data_reader.irf.irf import IceTracksDR2InstrumentResponseFunction +from icecube_data_reader.event_types import IC86 +import numpy as np +import astropy.units as u +from astropy.coordinates import SkyCoord + + +def test_eres(): + irf = IceTracksDR2InstrumentResponseFunction.load(IC86) + irf.create_eres() + + Etrue = 10**2.25 * u.GeV + et_idx = np.digitize(Etrue, irf.tE_bin_edges) - 1 + + coord = SkyCoord(ra=90 * u.deg, dec=2 * u.deg, frame="icrs") + dec_idx = np.digitize(coord.dec.deg, irf.dec_bin_edges) - 1 + + recoE = irf.sample_energy(coord, Etrue, N=100_000) + bins = irf.recoE_bin_edges[et_idx][dec_idx] + + pdf = irf.recoE_hists[et_idx][dec_idx] + + n, _ = np.histogram(recoE, bins, density=True) + cutoff = pdf >= 1e-2 + + assert np.all(pytest.approx(pdf[cutoff], abs=1e-2) == n[cutoff]) From 849b308b58f6328609872698a58728ee5cc103d0 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Mon, 1 Jun 2026 12:02:43 +0200 Subject: [PATCH 22/39] add dummy pdf --- icecube_data_reader/utils/utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 icecube_data_reader/utils/utils.py diff --git a/icecube_data_reader/utils/utils.py b/icecube_data_reader/utils/utils.py new file mode 100644 index 0000000..a127310 --- /dev/null +++ b/icecube_data_reader/utils/utils.py @@ -0,0 +1,21 @@ +import numpy as np + + +class DummyPDF: + def __init__(self): + pass + + def pdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def cdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def rvs(self, size=1, random_state=42): + raise NotImplementedError("Dummies cannot be sampled from!") From 73a83862efd9f6bc3e1487d83136fd0b8611cc4b Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Tue, 2 Jun 2026 09:05:11 +0200 Subject: [PATCH 23/39] bugfix in eres storage --- icecube_data_reader/irf/irf.py | 69 +++++++++++++++++++----------- icecube_data_reader/utils/utils.py | 41 ++++++++++++++++++ 2 files changed, 84 insertions(+), 26 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index b154632..140d88f 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -64,17 +64,20 @@ def __init__(self, data: np.ndarray, season: EventType): def _post_init(self): # Break naming convention because r and t are too close on the keyboard - self.recoE_bin_edges = [ - [None] * self.sin_dec_bin_centers.size - ] * self.log_tE_bin_centers.size + self.recoE_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) # Create empty array for rv_histograms storing the energy resolution # for each bin of true energy and true declination - self.recoE_hists = [ - [None] * self.sin_dec_bin_centers.size - ] * self.log_tE_bin_centers.size - self.recoE_sampling = [ - [None] * self.sin_dec_bin_centers.size - ] * self.log_tE_bin_centers.size + self.recoE_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.recoE_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=stats.rv_histogram, + ) # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram self.psf_hists = [ [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers @@ -120,10 +123,11 @@ def create_eres(self) -> None: for c_tE in range(self.log_tE_bin_centers.size): for c_d in range(self.sin_dec_bin_centers.size): - if isinstance( - self.recoE_sampling[c_tE][c_d], stats.rv_histogram - ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): - continue + print("loop", c_tE, c_d) + # if isinstance( + # self.recoE_sampling[c_tE][c_d], stats.rv_histogram + # ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + # continue self._create_recoE_distribution(c_tE, c_d) @u.quantity_input @@ -153,6 +157,7 @@ def create_ang_res(self) -> None: self.ang_err_bin_edges[c_tE][c_d] = [None] * (bins.size - 1) self.ang_err_sampling[c_tE][c_d] = [None] * (bins.size - 1) for c_rE in range(bins.size - 1): + # Create both angular (psf and ang_err) distributions self.create_angular_distributions(c_tE, c_d, c_rE, data=ereco_data) @classmethod @@ -275,10 +280,11 @@ def _create_recoE_distribution( :rtype: tuple[np.ndarray, np.ndarray] """ + print("create eres", c_e, c_d) # Get entries at relevant true energy and declination reduced_data = self.data[ self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] - ] + ].copy() reduced_data = reduced_data[ reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] ] @@ -300,7 +306,10 @@ def _create_recoE_distribution( (frac_counts, bins), density=False ) self.recoE_bin_edges[c_e][c_d] = bins - self.recoE_hists[c_e][c_d] = frac_counts / frac_counts.sum() + summed = frac_counts.sum() + if summed == 0.0: + print("ereco hist is zero", c_e, c_d) + self.recoE_hists[c_e][c_d] = frac_counts / summed if return_data: return frac_counts, bins, reduced_data @@ -332,7 +341,9 @@ def create_angular_distributions( if data is None: # Get entries at relevant true energy and declination - reduced_data = self.data[self.data[:, 0] == self.log_tE_bin_edges[c_e]] + reduced_data = self.data[ + self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] + ] reduced_data = reduced_data[ reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] ] @@ -354,12 +365,12 @@ def create_angular_distributions( # Happens for Ereco bins with frac_counts = 0 # create empty histograms for this specific chain of # psf and ang_err - self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) - self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) - self.ang_err_sampling[c_e][c_d][c_rE] = [DummyPDF()] * 20 - self.ang_err_hists[c_e][c_d][c_rE] = [np.zeros(20)] * 20 - self.ang_err_bin_edges[c_e][c_d][c_rE] = [np.zeros(21)] * 20 - return + # self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) + # self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) + # self.ang_err_sampling[c_e][c_d][c_rE] = [DummyPDF()] * 20 + # self.ang_err_hists[c_e][c_d][c_rE] = [np.zeros(20)] * 20 + # self.ang_err_bin_edges[c_e][c_d][c_rE] = [np.zeros(21)] * 20 + return reduced_data frac_counts = np.zeros(bins.size - 1) self.psf_bin_edges[c_e][c_d][c_rE] = bins @@ -371,12 +382,15 @@ def create_angular_distributions( frac_counts[c_b] = np.sum( reduced_data[b == reduced_data[:, self.psf_idx], -1] ) - psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] - self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) + # psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] + # self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) # hist = stats.rv_histogram((frac_counts, bins), density=False) # self.psf_sampling[c_e][c_d][c_rE] = hist - self.psf_hists[c_e][c_d][c_rE] = frac_counts / frac_counts.sum() + summed = frac_counts.sum() + if summed == 0.0: + print("psf_hist is zero", c_e, c_d, c_rE) + self.psf_hists[c_e][c_d][c_rE] = frac_counts / summed if return_data: return frac_counts, bins, reduced_data return frac_counts, bins @@ -421,7 +435,10 @@ def _create_ang_err_distribution( ) # hist = stats.rv_histogram((frac_counts, bins), density=False) # self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist - self.ang_err_hists[c_e][c_d][c_rE][c_psf] = frac_counts / frac_counts.sum() + summed = frac_counts.sum() + if summed == 0.0: + print("ang_err hist is zero", c_e, c_d, c_rE, c_psf) + self.ang_err_hists[c_e][c_d][c_rE][c_psf] = frac_counts / summed if __name__ == "__main__": diff --git a/icecube_data_reader/utils/utils.py b/icecube_data_reader/utils/utils.py index a127310..c636465 100644 --- a/icecube_data_reader/utils/utils.py +++ b/icecube_data_reader/utils/utils.py @@ -19,3 +19,44 @@ def cdf(self, x): def rvs(self, size=1, random_state=42): raise NotImplementedError("Dummies cannot be sampled from!") + + +class ddict(dict): + """ + Modified dictionary class, derived from `dict`. + Used to nest dictionaries without having to write [] all the time when adding or calling. + """ + + def __init__(self): + super().__init__() + + def add(self, value, *keys): + """ + Add value to chain of keys. + Careful: this may overwrite existing values! + :param value: Value to be added + :param keys: Tuple containing ordered keys behind which the value should be added. + """ + # TODO: protect from overwriting + + temp = self + for key in keys[:-1]: + try: + temp[key] + except KeyError: + temp[key] = {} + finally: + temp = temp[key] + temp[keys[-1]] = value + + def __call__(self, *keys): + """ + Call value of nested dicts. + :param keys: Tuple of ordered keys whose value should be returned + :return: value behind tuple of keys + """ + + temp = self + for key in keys: + temp = temp[key] + return temp From 7128819cc259e904bb425440c13182da89090de7 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Tue, 2 Jun 2026 16:29:47 +0200 Subject: [PATCH 24/39] fix docstrings --- icecube_data_reader/event_types.py | 22 ------------------- icecube_data_reader/events.py | 2 +- icecube_data_reader/irf/irf.py | 35 ++++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/icecube_data_reader/event_types.py b/icecube_data_reader/event_types.py index 55667ba..1a24c3d 100644 --- a/icecube_data_reader/event_types.py +++ b/icecube_data_reader/event_types.py @@ -86,28 +86,6 @@ class Refrigerator: detectors = [IC40, IC59, IC79, IC86, IC86_I, IC86_II] - ''' - @classmethod - def python2dm(cls, python): - """Returns EventType corresponding to python event-type string""" - - for dm in cls.detectors: - if dm.P == python: - return dm - else: - raise ValueError(f"No detector {python} available.") - ''' - ''' - @classmethod - def stan2dm(cls, stan): - """Returns EventType corresponding to stan event-type""" - - for dm in cls.detectors: - if dm.S == stan: - return dm - else: - raise ValueError(f"No detector {stan} available.") - ''' @classmethod def int2str(cls, int_): """Returns python event-type string corresponding to integer event-type""" diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index 21fa077..c89e26b 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -321,7 +321,7 @@ def apply_energy_cut(self, Emin: u.GeV, Emax=np.inf * u.GeV) -> None: :param Emax: Maximum allowed energy, defaults to np.inf*u.GeV :type Emax: u.GeV, optional """ - mask = (self.energy >= Emin) & (self.energy <= Emax) + mask = (self.energies >= Emin) & (self.energies <= Emax) self.select(mask) @classmethod diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 140d88f..4ed8c19 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -43,6 +43,7 @@ class IceTracksDR2InstrumentResponseFunction( InstrumentResponseFunction, EnergyResolution, AngularResolution ): + _STACK = {} def __init__(self, data: np.ndarray, season: EventType): """ DO NOT instantiate it via init, but rather through the class method `load` @@ -62,6 +63,12 @@ def __init__(self, data: np.ndarray, season: EventType): self.psf_idx = 6 self.ang_err_idx = 8 + self._eres = False + self._ang_res = False + + if season in self._STACK: + self.__dict__ = self.STACK[season].__dict__ + def _post_init(self): # Break naming convention because r and t are too close on the keyboard self.recoE_bin_edges = np.empty( @@ -121,6 +128,8 @@ def create_IRF(self) -> None: def create_eres(self) -> None: """Create energy resolution histograms""" + if self._eres: + return for c_tE in range(self.log_tE_bin_centers.size): for c_d in range(self.sin_dec_bin_centers.size): print("loop", c_tE, c_d) @@ -129,12 +138,15 @@ def create_eres(self) -> None: # ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): # continue self._create_recoE_distribution(c_tE, c_d) + self._eres = True @u.quantity_input @profile def create_ang_res(self) -> None: """Create angular resolution histograms""" + if self._ang_res: + return for c_tE in range(self.log_tE_bin_centers.size): for c_d in range(self.sin_dec_bin_centers.size): if not isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram): @@ -160,14 +172,15 @@ def create_ang_res(self) -> None: # Create both angular (psf and ang_err) distributions self.create_angular_distributions(c_tE, c_d, c_rE, data=ereco_data) + self._ang_res = True + self._eres = True + @classmethod def load(cls, season: EventType) -> Self: """Create energy resolution object for provided season :param season: Season :type season: EventType - :param dec: Declination - :type dec: u.deg :return: Energy resolution :rtype: :py:class:`IceTrackDR2EnergyResolution` """ @@ -225,8 +238,10 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = :type Etrue: u.GeV :param seed: Random seed, defaults to 42 :type seed: int, optional - :return: _description_ - :rtype: _type_ + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: Array of reconstructed energies + :rtype: np.ndarray """ if Etrue.shape == () and N > 1: @@ -272,12 +287,16 @@ def _create_recoE_distribution( :param c_e: Index of true energy bin :type c_e: int + :param c_d: Index of declination bin + :type c_d: int + :param return_data: Set to true if the relevant entries of the smearing matrix + are to be returned additionally, defaults to False :param data: Relevant entries (i.e. for true energy, declination) of the smearing matrix, defaults to None :type return_data: bool, optional :return: Tuple of fractional counts per bin and bin edges, optional relevant entries of smearing matrix - :rtype: tuple[np.ndarray, np.ndarray] + :rtype: tuple[np.ndarray, ...] """ print("create eres", c_e, c_d) @@ -382,8 +401,8 @@ def create_angular_distributions( frac_counts[c_b] = np.sum( reduced_data[b == reduced_data[:, self.psf_idx], -1] ) - # psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] - # self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) + psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] + self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) # hist = stats.rv_histogram((frac_counts, bins), density=False) # self.psf_sampling[c_e][c_d][c_rE] = hist @@ -409,6 +428,8 @@ def _create_ang_err_distribution( :param c_e: Index of true energy :type c_e: int + :param c_d: Index of declination bin + :type c_d: int :param c_rE: Index of reconstructed energy :type c_rE: int :param c_psf: Index of kinematic angle/PSF From 51d64362a5ec267efe387ed6aea0689acba81cf4 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 10:14:38 +0200 Subject: [PATCH 25/39] Make eres dec dependent, yet again --- icecube_data_reader/irf/irf.py | 102 +++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 4ed8c19..c0eef24 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -10,10 +10,11 @@ from scipy import stats import astropy.units as u from astropy.coordinates import SkyCoord - +from tqdm.notebook import tqdm from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 from icecube_data_reader.event_types import EventType from icecube_data_reader.utils.utils import DummyPDF +from itertools import product import logging @@ -86,24 +87,30 @@ def _post_init(self): dtype=stats.rv_histogram, ) # Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram - self.psf_hists = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.psf_bin_edges = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.psf_sampling = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.ang_err_hists = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.ang_err_bin_edges = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] - self.ang_err_sampling = [ - [[] for _ in self.sin_dec_bin_centers] for _ in self.log_tE_bin_centers - ] + self.psf_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.psf_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.psf_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_hists = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_bin_edges = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) + self.ang_err_sampling = np.empty( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size), + dtype=np.ndarray, + ) self.faulty = [] for c_e in range(self.log_tE_bin_centers.size): @@ -125,20 +132,34 @@ def create_IRF(self) -> None: @u.quantity_input @profile - def create_eres(self) -> None: + def create_eres( + self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), + show_progress: bool = True + ) -> None: """Create energy resolution histograms""" - if self._eres: - return - for c_tE in range(self.log_tE_bin_centers.size): - for c_d in range(self.sin_dec_bin_centers.size): - print("loop", c_tE, c_d) - # if isinstance( - # self.recoE_sampling[c_tE][c_d], stats.rv_histogram - # ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): - # continue - self._create_recoE_distribution(c_tE, c_d) - self._eres = True + if dec is not None: + dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 + dec_range = range(dec_idx, dec_idx + 1) + dec_size = 1 + else: + dec_min, dec_max = dec_range + dec_min_idx = np.digitize(dec_min.to_value(u.deg), self.dec_bin_edges) - 1 + dec_max_idx = np.digitize(dec_max.to_value(u.deg), self.dec_bin_edges) - 1 + print(dec_min_idx, dec_max_idx) + dec_idx = (dec_min_idx, dec_max_idx) + dec_range = range(dec_min_idx, dec_max_idx) + dec_size = dec_max_idx - dec_min_idx + print(dec_size) + + for c_d, c_tE in tqdm(product(dec_range, range(self.log_tE_bin_centers.size)), disable=not show_progress, desc="Energy resolution", total=self.log_tE_bin_centers.size * dec_size): + if isinstance( + self.recoE_sampling[c_tE][c_d], stats.rv_histogram + ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + continue + self._create_recoE_distribution(c_tE, c_d) @u.quantity_input @profile @@ -161,13 +182,13 @@ def create_ang_res(self) -> None: ereco_data = None bins = self.recoE_bin_edges[c_tE][c_d] # bins is reco energy, each bin is mapped to a PSF distribution - self.psf_hists[c_tE][c_d] = [None] * (bins.size - 1) - self.psf_bin_edges[c_tE][c_d] = [None] * (bins.size - 1) - self.psf_sampling[c_tE][c_d] = [None] * (bins.size - 1) + self.psf_hists[c_tE][c_d] = [[] for _ in range(bins.size)] + self.psf_bin_edges[c_tE][c_d] = [[] for _ in range(bins.size)] + self.psf_sampling[c_tE][c_d] = [[] for _ in range(bins.size)] - self.ang_err_hists[c_tE][c_d] = [None] * (bins.size - 1) - self.ang_err_bin_edges[c_tE][c_d] = [None] * (bins.size - 1) - self.ang_err_sampling[c_tE][c_d] = [None] * (bins.size - 1) + self.ang_err_hists[c_tE][c_d] = [[] for _ in range(bins.size)] + self.ang_err_bin_edges[c_tE][c_d] = [[] for _ in range(bins.size)] + self.ang_err_sampling[c_tE][c_d] = [[] for _ in range(bins.size)] for c_rE in range(bins.size - 1): # Create both angular (psf and ang_err) distributions self.create_angular_distributions(c_tE, c_d, c_rE, data=ereco_data) @@ -299,7 +320,6 @@ def _create_recoE_distribution( :rtype: tuple[np.ndarray, ...] """ - print("create eres", c_e, c_d) # Get entries at relevant true energy and declination reduced_data = self.data[ self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] @@ -326,8 +346,6 @@ def _create_recoE_distribution( ) self.recoE_bin_edges[c_e][c_d] = bins summed = frac_counts.sum() - if summed == 0.0: - print("ereco hist is zero", c_e, c_d) self.recoE_hists[c_e][c_d] = frac_counts / summed if return_data: @@ -407,8 +425,6 @@ def create_angular_distributions( # hist = stats.rv_histogram((frac_counts, bins), density=False) # self.psf_sampling[c_e][c_d][c_rE] = hist summed = frac_counts.sum() - if summed == 0.0: - print("psf_hist is zero", c_e, c_d, c_rE) self.psf_hists[c_e][c_d][c_rE] = frac_counts / summed if return_data: return frac_counts, bins, reduced_data @@ -457,8 +473,6 @@ def _create_ang_err_distribution( # hist = stats.rv_histogram((frac_counts, bins), density=False) # self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist summed = frac_counts.sum() - if summed == 0.0: - print("ang_err hist is zero", c_e, c_d, c_rE, c_psf) self.ang_err_hists[c_e][c_d][c_rE][c_psf] = frac_counts / summed From 59a7848afb9b161721b3d061c666cda6c3e28ad5 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 12:56:42 +0200 Subject: [PATCH 26/39] simplify ang err creation --- icecube_data_reader/irf/irf.py | 108 +++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index c0eef24..a544bb0 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -148,13 +148,18 @@ def create_eres( dec_min, dec_max = dec_range dec_min_idx = np.digitize(dec_min.to_value(u.deg), self.dec_bin_edges) - 1 dec_max_idx = np.digitize(dec_max.to_value(u.deg), self.dec_bin_edges) - 1 - print(dec_min_idx, dec_max_idx) dec_idx = (dec_min_idx, dec_max_idx) dec_range = range(dec_min_idx, dec_max_idx) dec_size = dec_max_idx - dec_min_idx - print(dec_size) - for c_d, c_tE in tqdm(product(dec_range, range(self.log_tE_bin_centers.size)), disable=not show_progress, desc="Energy resolution", total=self.log_tE_bin_centers.size * dec_size): + for c_d, c_tE in tqdm( + product( + dec_range, range(self.log_tE_bin_centers.size) + ), + disable=not show_progress, + desc="Energy resolution", + total=self.log_tE_bin_centers.size * dec_size + ): if isinstance( self.recoE_sampling[c_tE][c_d], stats.rv_histogram ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): @@ -163,38 +168,68 @@ def create_eres( @u.quantity_input @profile - def create_ang_res(self) -> None: + def create_ang_res(self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), + show_progress: bool = True + ) -> None: """Create angular resolution histograms""" - if self._ang_res: - return - for c_tE in range(self.log_tE_bin_centers.size): - for c_d in range(self.sin_dec_bin_centers.size): - if not isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram): - frac_counts, bins, ereco_data = self._create_recoE_distribution( - c_tE, c_d, return_data=True - ) - self.recoE_sampling[c_tE][c_d] = stats.rv_histogram( - (frac_counts, bins), density=False - ) - self.recoE_bin_edges[c_tE][c_d] = bins - else: - ereco_data = None - bins = self.recoE_bin_edges[c_tE][c_d] - # bins is reco energy, each bin is mapped to a PSF distribution - self.psf_hists[c_tE][c_d] = [[] for _ in range(bins.size)] - self.psf_bin_edges[c_tE][c_d] = [[] for _ in range(bins.size)] - self.psf_sampling[c_tE][c_d] = [[] for _ in range(bins.size)] - - self.ang_err_hists[c_tE][c_d] = [[] for _ in range(bins.size)] - self.ang_err_bin_edges[c_tE][c_d] = [[] for _ in range(bins.size)] - self.ang_err_sampling[c_tE][c_d] = [[] for _ in range(bins.size)] - for c_rE in range(bins.size - 1): - # Create both angular (psf and ang_err) distributions - self.create_angular_distributions(c_tE, c_d, c_rE, data=ereco_data) - - self._ang_res = True - self._eres = True + if dec is not None: + dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 + dec_range = range(dec_idx, dec_idx + 1) + dec_size = 1 + else: + dec_min, dec_max = dec_range + dec_min_idx = np.digitize(dec_min.to_value(u.deg), self.dec_bin_edges) - 1 + dec_max_idx = np.digitize(dec_max.to_value(u.deg), self.dec_bin_edges) - 1 + dec_idx = (dec_min_idx, dec_max_idx) + dec_range = range(dec_min_idx, dec_max_idx) + dec_size = dec_max_idx - dec_min_idx + + self.ang_err_bin_edges = np.zeros( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21), + ) + self.ang_err_hists = np.zeros( + (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20), + ) + for c_d, c_tE in tqdm( + product( + dec_range, range(self.log_tE_bin_centers.size) + ), + disable=not show_progress, + desc="Angular resolution", + total=self.log_tE_bin_centers.size * dec_size + ): + if isinstance( + self.recoE_sampling[c_tE][c_d], stats.rv_histogram + ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): + etrue_data = self.data[self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_tE]] + data = etrue_data[etrue_data[:, self.dec_idx] == self.dec_bin_edges[c_d]] + _, _, data = self._create_recoE_distribution(c_tE, c_d, return_data=True) + + all_ang_err_bins = np.empty((self.recoE_bin_edges[c_tE, c_d].size - 1,), dtype=np.ndarray) + for c_rE, rE in enumerate(self.recoE_bin_edges[c_tE, c_d][:-1]): + # self.ang_err_hists[c_tE, c_d] = np.empty((self.recoE_hists[c_tE, c_d].size), dtype=np.ndarray) + red_data = data[data[:, self.ereco_idx] == rE] + all_ang_err_bins[c_rE] = np.unique(red_data[:, self.ang_err_idx:self.ang_err_idx+2].flatten()) + ang_err_counts = np.zeros(all_ang_err_bins[c_rE].size - 1) + for c_ar, ar in enumerate(all_ang_err_bins[c_rE][:-1]): + ang_err_counts[c_ar] = red_data[red_data[:, self.ang_err_idx] == ar, -1].sum() + self.ang_err_hists[c_tE, c_d, c_rE] = ang_err_counts.copy() + for low, high in pairwise(all_ang_err_bins): + if not np.all(low == high): + # Has been tested in a notebook, should be fine! + raise AssertionError("Not all ang_err bins are the same! Investigate manually and fix me.") + + self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() + + + + + + + @classmethod def load(cls, season: EventType) -> Self: @@ -320,6 +355,13 @@ def _create_recoE_distribution( :rtype: tuple[np.ndarray, ...] """ + if (c_e, c_d) in self.faulty: + self.recoE_bin_edges[c_e, c_d] = np.arange(21) + self.recoE_hists[c_e, c_d] = np.zeros(20) + self.recoE_sampling[c_e, c_d] = DummyPDF() + if return_data: + return None, None, None + return None, None # Get entries at relevant true energy and declination reduced_data = self.data[ self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] From 096ebc438952724183a3af2042f03ace91a67e76 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 13:02:10 +0200 Subject: [PATCH 27/39] update doc strings --- icecube_data_reader/irf/irf.py | 55 ++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index a544bb0..39a5a45 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -125,10 +125,26 @@ def _post_init(self): @u.quantity_input @profile - def create_IRF(self) -> None: - """Create entire chain of IRF distributions at provided declination.""" + def create_IRF( + self, + dec: u.Quantity[u.deg] | None = None, + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), + show_progress: bool = True + ) -> None: + """Create the entire IRF, i.e. energy and angular resolution. - self.create_ang_res() + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + """ + + self.create_ang_res(dec, dec_range, show_progress) @u.quantity_input @profile @@ -138,7 +154,18 @@ def create_eres( dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), show_progress: bool = True ) -> None: - """Create energy resolution histograms""" + """Create the energy resolution. + + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + """ if dec is not None: dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 @@ -173,7 +200,25 @@ def create_ang_res(self, dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), show_progress: bool = True ) -> None: - """Create angular resolution histograms""" + """Create angular resolution distributions. + The intermediate step of kinematic angle / PSF is irrelevant, + as it is not an observable. We skip this explicit simulation step by marginalising over it. + For each Etrue, DEC pair, the binning of ang_err is fixed. Hence we collect the + ang_err_bin_edges and find the fractional counts only for specific values of reconstructed energy. + If the required energy resolution has not been created, it will be done automatically. + + Selection by declination or declination range is possible. A single value of `dec` takes precedent + over a provided range. + + :param dec: Declination, defaults to None + :type dec: u.Quantity[u.deg] | None, optional + :param dec_range: Declination range, defaults to (-90. * u.deg, 90. * u.deg) + :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional + :param show_progress: Display progress bar, defaults to True + :type show_progress: bool, optional + :raises AssertionError: If not all ang_err_bin_edges within one pair of Etrue and DEC + are the same, an AssertionError is raised + """ if dec is not None: dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 From ac0181c99cbee905bfea84ae83f79e705e5cf513 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 15:55:20 +0200 Subject: [PATCH 28/39] convert frac counts to normalised pdfs --- icecube_data_reader/irf/irf.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 39a5a45..a4c3629 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -261,20 +261,14 @@ def create_ang_res(self, ang_err_counts = np.zeros(all_ang_err_bins[c_rE].size - 1) for c_ar, ar in enumerate(all_ang_err_bins[c_rE][:-1]): ang_err_counts[c_ar] = red_data[red_data[:, self.ang_err_idx] == ar, -1].sum() - self.ang_err_hists[c_tE, c_d, c_rE] = ang_err_counts.copy() + hist = ang_err_counts.copy() + self.ang_err_hists[c_tE, c_d, c_rE] = hist / (hist * np.diff(all_ang_err_bins[c_rE])).sum() for low, high in pairwise(all_ang_err_bins): if not np.all(low == high): # Has been tested in a notebook, should be fine! raise AssertionError("Not all ang_err bins are the same! Investigate manually and fix me.") - self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() - - - - - - - + self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() @classmethod def load(cls, season: EventType) -> Self: @@ -432,8 +426,8 @@ def _create_recoE_distribution( (frac_counts, bins), density=False ) self.recoE_bin_edges[c_e][c_d] = bins - summed = frac_counts.sum() - self.recoE_hists[c_e][c_d] = frac_counts / summed + # summed = frac_counts.sum() + self.recoE_hists[c_e][c_d] = frac_counts / (frac_counts * np.diff(bins)).sum() if return_data: return frac_counts, bins, reduced_data @@ -463,6 +457,7 @@ def create_angular_distributions( :rtype: tuple[np.ndarray, ...] """ + raise NotImplementedError() if data is None: # Get entries at relevant true energy and declination reduced_data = self.data[ @@ -543,6 +538,7 @@ def _create_ang_err_distribution( :rtype: tuple[np.ndarray, ...] """ + raise NotImplementedError() # Reduced data by etrue, dec, ereco, psf bins = np.sort( np.unique( From 57c80c2849d603cd35029dd0a7b7a2291cb90c6a Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 15:56:03 +0200 Subject: [PATCH 29/39] removed obsolete methods --- icecube_data_reader/irf/irf.py | 135 --------------------------------- 1 file changed, 135 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index a4c3629..1581b59 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -299,12 +299,6 @@ def load(cls, season: EventType) -> Self: # use log binning for angular quantities data[:, 6:-1] = np.log10(data[:, 6:-1]) - - # dec_idx = np.digitize(dec.to_value(u.deg), dec_bin_edges) - 1 - # data = data[data[:, 2] == dec_bin_edges[dec_idx]] - # Remove declination bc we only use one declination bin - # mask = [True, True, False, False, True, True, True, True, True, True, True] - # data = data[:, mask] irf = cls(data, season) irf.log_tE_bin_edges = log_tE_bin_edges irf.log_tE_bin_centers = log_tE_bin_centers @@ -314,9 +308,6 @@ def load(cls, season: EventType) -> Self: irf.sin_dec_bin_centers = ( irf.sin_dec_bin_edges[:-1] + irf.sin_dec_bin_edges[1:] ) / 2 - # irf.dec_idx = dec_idx - # irf.dec_min = dec_bin_edges[dec_idx] * u.deg - # irf.dec_max = dec_bin_edges[dec_idx + 1] * u.deg irf._post_init() @@ -433,132 +424,6 @@ def _create_recoE_distribution( return frac_counts, bins, reduced_data return frac_counts, bins - @profile - def create_angular_distributions( - self, - c_e: int, - c_d: int, - c_rE: int, - data: None | np.ndarray = None, - return_data: bool = False, - ) -> tuple[np.ndarray, ...]: - """Creates PSF distribution for provided indices of preceeding histograms - by marginalising over the angular error. - - :param c_e: Index of true energy bin - :type c_e: int - :param c_rE: Index of reconstructed energy bin - :type c_rE: int - :param data: Relevant entries (i.e. for true energy, declination) - of the smearing matrix, defaults to None - :type data: None | np.ndarray, optional - :return: Tuple of fractional counts ber bin and bin edges, optional relevant data - of smearing matrix - :rtype: tuple[np.ndarray, ...] - """ - - raise NotImplementedError() - if data is None: - # Get entries at relevant true energy and declination - reduced_data = self.data[ - self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_e] - ] - reduced_data = reduced_data[ - reduced_data[:, self.dec_idx] == self.dec_bin_edges[c_d] - ] - - else: - reduced_data = data - - # Get entries at relevant reco energy - reduced_data = reduced_data[ - reduced_data[:, self.ereco_idx] == self.recoE_bin_edges[c_e][c_d][c_rE] - ] - - # Create bin edges of kinematic angle / PSF - bins = np.sort( - np.unique(reduced_data[:, self.psf_idx : self.psf_idx + 2].flatten()) - ) - - if bins.size == 0: - # Happens for Ereco bins with frac_counts = 0 - # create empty histograms for this specific chain of - # psf and ang_err - # self.psf_bin_edges[c_e][c_d][c_rE] = np.arange(21) - # self.psf_hists[c_e][c_d][c_rE] = np.zeros(20) - # self.ang_err_sampling[c_e][c_d][c_rE] = [DummyPDF()] * 20 - # self.ang_err_hists[c_e][c_d][c_rE] = [np.zeros(20)] * 20 - # self.ang_err_bin_edges[c_e][c_d][c_rE] = [np.zeros(21)] * 20 - return reduced_data - - frac_counts = np.zeros(bins.size - 1) - self.psf_bin_edges[c_e][c_d][c_rE] = bins - self.ang_err_sampling[c_e][c_d][c_rE] = [None] * (bins.size - 1) - self.ang_err_hists[c_e][c_d][c_rE] = [None] * (bins.size - 1) - self.ang_err_bin_edges[c_e][c_d][c_rE] = [None] * (bins.size - 1) - # Calculate fractional count in each PSF bin by marginalising over ang_err - for c_b, b in enumerate(bins[:-1]): - frac_counts[c_b] = np.sum( - reduced_data[b == reduced_data[:, self.psf_idx], -1] - ) - psf_reduced_data = reduced_data[reduced_data[:, self.psf_idx] == b] - self._create_ang_err_distribution(c_e, c_d, c_rE, c_b, psf_reduced_data) - - # hist = stats.rv_histogram((frac_counts, bins), density=False) - # self.psf_sampling[c_e][c_d][c_rE] = hist - summed = frac_counts.sum() - self.psf_hists[c_e][c_d][c_rE] = frac_counts / summed - if return_data: - return frac_counts, bins, reduced_data - return frac_counts, bins - - @profile - def _create_ang_err_distribution( - self, - c_e: int, - c_d: int, - c_rE: int, - c_psf: int, - reduced_data: np.ndarray, - ) -> tuple[np.ndarray, ...]: - """Create angular error distribution for provided true energy, - reco energy and kinematic angle indices. - - :param c_e: Index of true energy - :type c_e: int - :param c_d: Index of declination bin - :type c_d: int - :param c_rE: Index of reconstructed energy - :type c_rE: int - :param c_psf: Index of kinematic angle/PSF - :type c_psf: int - :param reduced_data: Relevant entries of the smearing matrix - :type reduced_data: np.ndarray - :return: _description_ - :rtype: tuple[np.ndarray, ...] - """ - - raise NotImplementedError() - # Reduced data by etrue, dec, ereco, psf - bins = np.sort( - np.unique( - reduced_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() - ) - ) - - self.ang_err_bin_edges[c_e][c_d][c_rE][c_psf] = bins - - frac_counts = np.zeros(bins.size - 1) - for c_b, b in enumerate(bins[:-1]): - frac_counts[c_b] = np.sum( - reduced_data[b == reduced_data[:, self.ang_err_idx], -1] - ) - # hist = stats.rv_histogram((frac_counts, bins), density=False) - # self.ang_err_sampling[c_e][c_d][c_rE][c_psf] = hist - summed = frac_counts.sum() - self.ang_err_hists[c_e][c_d][c_rE][c_psf] = frac_counts / summed - - if __name__ == "__main__": from icecube_data_reader.event_types import IC86 From bf3df2c60844199719626c18c7c42b85aba26621 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 16:43:58 +0200 Subject: [PATCH 30/39] samples ang_err --- icecube_data_reader/irf/irf.py | 92 ++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 1581b59..4829cb3 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -14,6 +14,7 @@ from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 from icecube_data_reader.event_types import EventType from icecube_data_reader.utils.utils import DummyPDF +from icecube_data_reader.events import Events from itertools import product import logging @@ -22,9 +23,6 @@ logger.setLevel(logging.DEBUG) -from line_profiler import profile - - class InstrumentResponseFunction(ABC): @classmethod @abstractmethod @@ -124,7 +122,6 @@ def _post_init(self): self.faulty.append((c_e, c_d)) @u.quantity_input - @profile def create_IRF( self, dec: u.Quantity[u.deg] | None = None, @@ -147,7 +144,6 @@ def create_IRF( self.create_ang_res(dec, dec_range, show_progress) @u.quantity_input - @profile def create_eres( self, dec: u.Quantity[u.deg] | None = None, @@ -194,7 +190,6 @@ def create_eres( self._create_recoE_distribution(c_tE, c_d) @u.quantity_input - @profile def create_ang_res(self, dec: u.Quantity[u.deg] | None = None, dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), @@ -314,8 +309,14 @@ def load(cls, season: EventType) -> Self: return irf @u.quantity_input - def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1): - """Simulate events + def sample_energy( + self, + coord: SkyCoord, + Etrue: u.GeV, + seed: int = 42, + N: int = 1, + ) -> tuple[np.ndarray, int, np.ndarray]: + """Sample reco energy :param coord: Source coordinate, assumes only one coordinate per function call @@ -330,17 +331,81 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = :rtype: np.ndarray """ + return self._sample_energy(coord, Etrue, seed, N) + + @u.quantity_input + def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> Events: + """Sample reco energy + + :param coord: Source coordinate, + assumes only one coordinate per function call + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param seed: Random seed, defaults to 42 + :type seed: int, optional + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: + :rtype: np.ndarray + """ + + tE_idx, c_d, recoE = self._sample_energy(coord, Etrue, seed, N) + set_e = np.unique(tE_idx) + + recoE = np.atleast_1d(recoE) + ang_errs_out = np.zeros(recoE.size) + + for idx_e in set_e: + _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) + print(_index_e) + reco_idxs = np.digitize(recoE[_index_e], self.recoE_bin_edges[idx_e, c_d]) - 1 + set_rE = np.unique(reco_idxs) + for idx_rE in set_rE: + random = stats.rv_histogram( + (self.ang_err_hists[idx_e, c_d, idx_rE], self.ang_err_bin_edges[idx_e, c_d]), + density=True + ) + _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) + #print(_index_rE) + rvs = random.rvs(size = _index_rE.size) + #print(rvs) + ang_errs_out[_index_e[_index_rE]] = rvs + #print(idx_e, idx_rE) + + return ang_errs_out + + def _sample_energy( + self, + coord: SkyCoord, + Etrue: u.GeV, + seed: int = 42, + N: int = 1 + ) -> tuple[np.ndarray, int, np.ndarray]: + """Sample reco energy of events + + :param coord: Source coordinate, + assumes only one coordinate per function call + :type coord: SkyCoord + :param Etrue: True neutrino energy + :type Etrue: u.GeV + :param seed: Random seed, defaults to 42 + :type seed: int, optional + :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 + :type N: int, optional + :return: Etrue indices, declination index, array of reconstructed energies + :rtype: tuple[np.ndarray, int, np.ndarray] + """ + if Etrue.shape == () and N > 1: Etrue = np.full(N, Etrue.to_value(u.GeV)) else: Etrue = np.atleast_1d(Etrue.to_value(u.GeV)) coord.representation_type = "spherical" - ra = coord.ra dec = coord.dec c_d = np.digitize(dec.deg, self.dec_bin_edges) - 1 coord.representation_type = "cartesian" - unit_vector = np.array([coord.x, coord.y, coord.z]) coord.representation_type = "spherical" log_tE = np.log10(Etrue) @@ -350,17 +415,16 @@ def sample_energy(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = set_e = np.unique(tE_idx) for idx_e in set_e: - _index_e = np.argwhere(idx_e == tE_idx).squeeze() + _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) recoE = self.recoE_sampling[idx_e][c_d].rvs( size=_index_e.size, random_state=seed ) recoE_out[_index_e] = recoE if recoE_out.size == 1: - return recoE_out[0] - return recoE_out + return tE_idx, c_d, recoE_out[0] + return tE_idx, c_d, recoE_out - @profile def _create_recoE_distribution( self, c_e: int, From 46a4b67ce6e1f88cb2536505afdd41d6c06877bb Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Wed, 3 Jun 2026 17:09:07 +0200 Subject: [PATCH 31/39] add methods to deflect events --- icecube_data_reader/irf/irf.py | 34 +++++++++++++++++++++++++----- icecube_data_reader/utils/utils.py | 19 ----------------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 4829cb3..8baca21 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -358,7 +358,6 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E for idx_e in set_e: _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) - print(_index_e) reco_idxs = np.digitize(recoE[_index_e], self.recoE_bin_edges[idx_e, c_d]) - 1 set_rE = np.unique(reco_idxs) for idx_rE in set_rE: @@ -367,13 +366,38 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E density=True ) _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) - #print(_index_rE) rvs = random.rvs(size = _index_rE.size) - #print(rvs) - ang_errs_out[_index_e[_index_rE]] = rvs - #print(idx_e, idx_rE) + ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) + + + deflection_angles = stats.rayleigh.rvs(scale=ang_errs_out) + # Deflecte like we do in stan: sample rotation axis orthonormal to the event + # sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma + # rotate the initial direction/coord by `theta` around rotation axis + return ang_errs_out + + def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: + """Sample a vector that is orthonormal to the input + + :param x: Input array + :type x: np.ndarray + :return: Vector orthonormal to input + :rtype: np.ndarray + """ + v = stats.norm().rvs(size=3) + projected = x * np.dot(x, v) / np.linalg.norm(x) + ortho = v - projected + orthonormal = ortho / np.linalg.norm(ortho) + return orthonormal + + def _rotate_around_vector(self, rotatee: np.ndarray, axis: np.ndarray, theta: u.rad) -> np.ndarray: + """Rotate a vector around a second vector by an angle + """ + + theta = theta.to_value(u.rad) + return axis * np.dot(rotatee, axis) + np.cos(theta) * np.cross(np.cross(axis, rotatee), axis) + np.sin(theta) * np.cross(axis, rotatee) def _sample_energy( self, diff --git a/icecube_data_reader/utils/utils.py b/icecube_data_reader/utils/utils.py index c636465..ed9bdc7 100644 --- a/icecube_data_reader/utils/utils.py +++ b/icecube_data_reader/utils/utils.py @@ -1,25 +1,6 @@ import numpy as np -class DummyPDF: - def __init__(self): - pass - - def pdf(self, x): - if isinstance(x, np.ndarray): - return np.zeros_like(x) - else: - return 0.0 - - def cdf(self, x): - if isinstance(x, np.ndarray): - return np.zeros_like(x) - else: - return 0.0 - - def rvs(self, size=1, random_state=42): - raise NotImplementedError("Dummies cannot be sampled from!") - class ddict(dict): """ From 58c52de37c234c2998fc2d487f3c1027b05f642d Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Thu, 4 Jun 2026 21:12:50 +0200 Subject: [PATCH 32/39] WIP --- icecube_data_reader/irf/irf.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 8baca21..f8d2faf 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -370,11 +370,19 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) - deflection_angles = stats.rayleigh.rvs(scale=ang_errs_out) + deflection_angles = stats.rayleigh.rvs(scale=ang_errs_out) * u.rad # Deflecte like we do in stan: sample rotation axis orthonormal to the event # sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma # rotate the initial direction/coord by `theta` around rotation axis + coord.representation_type = "cartesian" + direction = np.array([coord.x, coord.y, coord.z]) + + rot_axis = self._sample_orthonormal(direction) + rotated = self._rotate_around_vector(direction, rot_axis, angle) + + + return ang_errs_out @@ -386,12 +394,13 @@ def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: :return: Vector orthonormal to input :rtype: np.ndarray """ - v = stats.norm().rvs(size=3) - projected = x * np.dot(x, v) / np.linalg.norm(x) + v = stats.norm().rvs(size=x.shape) + projected = x * np.vecdot(x, v) / np.linalg.norm(x) ortho = v - projected orthonormal = ortho / np.linalg.norm(ortho) return orthonormal + @u.quantity_input def _rotate_around_vector(self, rotatee: np.ndarray, axis: np.ndarray, theta: u.rad) -> np.ndarray: """Rotate a vector around a second vector by an angle """ From b079b6ddafa34fcbad7fd66ee25180922d4787d7 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 09:43:49 +0200 Subject: [PATCH 33/39] change return of sample_energy --- icecube_data_reader/irf/irf.py | 131 +++++++++++++++++------------ icecube_data_reader/utils/utils.py | 19 +++++ 2 files changed, 98 insertions(+), 52 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index f8d2faf..2ddf2f3 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -43,6 +43,7 @@ class IceTracksDR2InstrumentResponseFunction( ): _STACK = {} + def __init__(self, data: np.ndarray, season: EventType): """ DO NOT instantiate it via init, but rather through the class method `load` @@ -125,8 +126,11 @@ def _post_init(self): def create_IRF( self, dec: u.Quantity[u.deg] | None = None, - dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), - show_progress: bool = True + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, ) -> None: """Create the entire IRF, i.e. energy and angular resolution. @@ -147,8 +151,11 @@ def create_IRF( def create_eres( self, dec: u.Quantity[u.deg] | None = None, - dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), - show_progress: bool = True + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, ) -> None: """Create the energy resolution. @@ -161,7 +168,7 @@ def create_eres( :type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional :param show_progress: Display progress bar, defaults to True :type show_progress: bool, optional - """ + """ if dec is not None: dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 @@ -176,12 +183,10 @@ def create_eres( dec_size = dec_max_idx - dec_min_idx for c_d, c_tE in tqdm( - product( - dec_range, range(self.log_tE_bin_centers.size) - ), + product(dec_range, range(self.log_tE_bin_centers.size)), disable=not show_progress, desc="Energy resolution", - total=self.log_tE_bin_centers.size * dec_size + total=self.log_tE_bin_centers.size * dec_size, ): if isinstance( self.recoE_sampling[c_tE][c_d], stats.rv_histogram @@ -190,10 +195,14 @@ def create_eres( self._create_recoE_distribution(c_tE, c_d) @u.quantity_input - def create_ang_res(self, + def create_ang_res( + self, dec: u.Quantity[u.deg] | None = None, - dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = (-90. * u.deg, 90. * u.deg), - show_progress: bool = True + dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]] = ( + -90.0 * u.deg, + 90.0 * u.deg, + ), + show_progress: bool = True, ) -> None: """Create angular resolution distributions. The intermediate step of kinematic angle / PSF is irrelevant, @@ -213,7 +222,7 @@ def create_ang_res(self, :type show_progress: bool, optional :raises AssertionError: If not all ang_err_bin_edges within one pair of Etrue and DEC are the same, an AssertionError is raised - """ + """ if dec is not None: dec_idx = np.digitize(dec.to_value(u.deg), self.dec_bin_edges) - 1 @@ -234,36 +243,48 @@ def create_ang_res(self, (self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20), ) for c_d, c_tE in tqdm( - product( - dec_range, range(self.log_tE_bin_centers.size) - ), + product(dec_range, range(self.log_tE_bin_centers.size)), disable=not show_progress, desc="Angular resolution", - total=self.log_tE_bin_centers.size * dec_size + total=self.log_tE_bin_centers.size * dec_size, ): if isinstance( self.recoE_sampling[c_tE][c_d], stats.rv_histogram ) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF): - etrue_data = self.data[self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_tE]] - data = etrue_data[etrue_data[:, self.dec_idx] == self.dec_bin_edges[c_d]] + etrue_data = self.data[ + self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_tE] + ] + data = etrue_data[ + etrue_data[:, self.dec_idx] == self.dec_bin_edges[c_d] + ] _, _, data = self._create_recoE_distribution(c_tE, c_d, return_data=True) - all_ang_err_bins = np.empty((self.recoE_bin_edges[c_tE, c_d].size - 1,), dtype=np.ndarray) + all_ang_err_bins = np.empty( + (self.recoE_bin_edges[c_tE, c_d].size - 1,), dtype=np.ndarray + ) for c_rE, rE in enumerate(self.recoE_bin_edges[c_tE, c_d][:-1]): # self.ang_err_hists[c_tE, c_d] = np.empty((self.recoE_hists[c_tE, c_d].size), dtype=np.ndarray) red_data = data[data[:, self.ereco_idx] == rE] - all_ang_err_bins[c_rE] = np.unique(red_data[:, self.ang_err_idx:self.ang_err_idx+2].flatten()) + all_ang_err_bins[c_rE] = np.unique( + red_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten() + ) ang_err_counts = np.zeros(all_ang_err_bins[c_rE].size - 1) for c_ar, ar in enumerate(all_ang_err_bins[c_rE][:-1]): - ang_err_counts[c_ar] = red_data[red_data[:, self.ang_err_idx] == ar, -1].sum() + ang_err_counts[c_ar] = red_data[ + red_data[:, self.ang_err_idx] == ar, -1 + ].sum() hist = ang_err_counts.copy() - self.ang_err_hists[c_tE, c_d, c_rE] = hist / (hist * np.diff(all_ang_err_bins[c_rE])).sum() + self.ang_err_hists[c_tE, c_d, c_rE] = ( + hist / (hist * np.diff(all_ang_err_bins[c_rE])).sum() + ) for low, high in pairwise(all_ang_err_bins): if not np.all(low == high): # Has been tested in a notebook, should be fine! - raise AssertionError("Not all ang_err bins are the same! Investigate manually and fix me.") - - self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() + raise AssertionError( + "Not all ang_err bins are the same! Investigate manually and fix me." + ) + + self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy() @classmethod def load(cls, season: EventType) -> Self: @@ -315,7 +336,7 @@ def sample_energy( Etrue: u.GeV, seed: int = 42, N: int = 1, - ) -> tuple[np.ndarray, int, np.ndarray]: + ) -> np.ndarray: """Sample reco energy :param coord: Source coordinate, @@ -331,10 +352,13 @@ def sample_energy( :rtype: np.ndarray """ - return self._sample_energy(coord, Etrue, seed, N) - + _, _, recoE_out = self._sample_energy(coord, Etrue, seed, N) + return recoE_out + @u.quantity_input - def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> Events: + def sample( + self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1 + ) -> Events: """Sample reco energy :param coord: Source coordinate, @@ -346,7 +370,7 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E :type seed: int, optional :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 :type N: int, optional - :return: + :return: :rtype: np.ndarray """ @@ -358,18 +382,22 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E for idx_e in set_e: _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) - reco_idxs = np.digitize(recoE[_index_e], self.recoE_bin_edges[idx_e, c_d]) - 1 + reco_idxs = ( + np.digitize(recoE[_index_e], self.recoE_bin_edges[idx_e, c_d]) - 1 + ) set_rE = np.unique(reco_idxs) for idx_rE in set_rE: random = stats.rv_histogram( - (self.ang_err_hists[idx_e, c_d, idx_rE], self.ang_err_bin_edges[idx_e, c_d]), - density=True + ( + self.ang_err_hists[idx_e, c_d, idx_rE], + self.ang_err_bin_edges[idx_e, c_d], + ), + density=True, ) _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) - rvs = random.rvs(size = _index_rE.size) + rvs = random.rvs(size=_index_rE.size) ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) - deflection_angles = stats.rayleigh.rvs(scale=ang_errs_out) * u.rad # Deflecte like we do in stan: sample rotation axis orthonormal to the event # sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma @@ -381,11 +409,8 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1) -> E rot_axis = self._sample_orthonormal(direction) rotated = self._rotate_around_vector(direction, rot_axis, angle) - - - return ang_errs_out - + def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: """Sample a vector that is orthonormal to the input @@ -393,28 +418,29 @@ def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: :type x: np.ndarray :return: Vector orthonormal to input :rtype: np.ndarray - """ + """ v = stats.norm().rvs(size=x.shape) projected = x * np.vecdot(x, v) / np.linalg.norm(x) ortho = v - projected orthonormal = ortho / np.linalg.norm(ortho) return orthonormal - + @u.quantity_input - def _rotate_around_vector(self, rotatee: np.ndarray, axis: np.ndarray, theta: u.rad) -> np.ndarray: - """Rotate a vector around a second vector by an angle - """ + def _rotate_around_vector( + self, rotatee: np.ndarray, axis: np.ndarray, theta: u.rad + ) -> np.ndarray: + """Rotate a vector around a second vector by an angle""" theta = theta.to_value(u.rad) - return axis * np.dot(rotatee, axis) + np.cos(theta) * np.cross(np.cross(axis, rotatee), axis) + np.sin(theta) * np.cross(axis, rotatee) + return ( + axis * np.dot(rotatee, axis) + + np.cos(theta) * np.cross(np.cross(axis, rotatee), axis) + + np.sin(theta) * np.cross(axis, rotatee) + ) def _sample_energy( - self, - coord: SkyCoord, - Etrue: u.GeV, - seed: int = 42, - N: int = 1 - ) -> tuple[np.ndarray, int, np.ndarray]: + self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1 + ) -> tuple[np.ndarray, int, np.ndarray]: """Sample reco energy of events :param coord: Source coordinate, @@ -521,6 +547,7 @@ def _create_recoE_distribution( return frac_counts, bins, reduced_data return frac_counts, bins + if __name__ == "__main__": from icecube_data_reader.event_types import IC86 diff --git a/icecube_data_reader/utils/utils.py b/icecube_data_reader/utils/utils.py index ed9bdc7..c636465 100644 --- a/icecube_data_reader/utils/utils.py +++ b/icecube_data_reader/utils/utils.py @@ -1,6 +1,25 @@ import numpy as np +class DummyPDF: + def __init__(self): + pass + + def pdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def cdf(self, x): + if isinstance(x, np.ndarray): + return np.zeros_like(x) + else: + return 0.0 + + def rvs(self, size=1, random_state=42): + raise NotImplementedError("Dummies cannot be sampled from!") + class ddict(dict): """ From 7384271944c953d58aaf28fe1e02c99b3df785fc Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 10:21:18 +0200 Subject: [PATCH 34/39] sample events --- icecube_data_reader/irf/irf.py | 62 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 2ddf2f3..ff9f528 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -10,11 +10,12 @@ from scipy import stats import astropy.units as u from astropy.coordinates import SkyCoord +from astropy.time import Time from tqdm.notebook import tqdm from icecube_data_reader.downloader import available_datasets, data_directory, I3_14 from icecube_data_reader.event_types import EventType from icecube_data_reader.utils.utils import DummyPDF -from icecube_data_reader.events import Events +from icecube_data_reader.events import IceTrackDR2Events from itertools import product import logging @@ -334,7 +335,6 @@ def sample_energy( self, coord: SkyCoord, Etrue: u.GeV, - seed: int = 42, N: int = 1, ) -> np.ndarray: """Sample reco energy @@ -344,21 +344,17 @@ def sample_energy( :type coord: SkyCoord :param Etrue: True neutrino energy :type Etrue: u.GeV - :param seed: Random seed, defaults to 42 - :type seed: int, optional :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 :type N: int, optional :return: Array of reconstructed energies :rtype: np.ndarray """ - _, _, recoE_out = self._sample_energy(coord, Etrue, seed, N) + _, _, recoE_out = self._sample_energy(coord, Etrue, N) return recoE_out @u.quantity_input - def sample( - self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1 - ) -> Events: + def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events: """Sample reco energy :param coord: Source coordinate, @@ -366,15 +362,13 @@ def sample( :type coord: SkyCoord :param Etrue: True neutrino energy :type Etrue: u.GeV - :param seed: Random seed, defaults to 42 - :type seed: int, optional :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 :type N: int, optional :return: :rtype: np.ndarray """ - tE_idx, c_d, recoE = self._sample_energy(coord, Etrue, seed, N) + tE_idx, c_d, recoE = self._sample_energy(coord, Etrue, N) set_e = np.unique(tE_idx) recoE = np.atleast_1d(recoE) @@ -395,21 +389,43 @@ def sample( density=True, ) _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) - rvs = random.rvs(size=_index_rE.size) + rvs = random.rvs(size=_index_rE.size, random_state=self._random) + # we sample log10(angle/deg), need the angle in rad + # for sampling from the Rayleigh distribution ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) - deflection_angles = stats.rayleigh.rvs(scale=ang_errs_out) * u.rad + deflection_angles = np.atleast_1d( + stats.rayleigh.rvs(scale=ang_errs_out, random_state=self._random) * u.rad + ) + ang_errs_out = (ang_errs_out * u.rad).to(u.deg) # Deflecte like we do in stan: sample rotation axis orthonormal to the event # sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma # rotate the initial direction/coord by `theta` around rotation axis coord.representation_type = "cartesian" direction = np.array([coord.x, coord.y, coord.z]) - - rot_axis = self._sample_orthonormal(direction) - rotated = self._rotate_around_vector(direction, rot_axis, angle) - - return ang_errs_out + new_directions = np.zeros((3, N)) + for c, angle in enumerate(deflection_angles): + rot_axis = self._sample_orthonormal(direction) + new_directions[:, c] = self._rotate_around_vector( + direction, rot_axis, angle + ) + new_coords = SkyCoord( + x=new_directions[0], + y=new_directions[1], + z=new_directions[2], + frame="icrs", + representation_type="cartesian", + ) + new_coords.representation_type = "spherical" + events = IceTrackDR2Events( + np.power(10, recoE) * u.GeV, + new_coords, + np.full(N, self.season), + ang_errs_out, + Time(np.full(N, 99.0), format="mjd"), + ) + return events def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: """Sample a vector that is orthonormal to the input @@ -419,8 +435,8 @@ def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: :return: Vector orthonormal to input :rtype: np.ndarray """ - v = stats.norm().rvs(size=x.shape) - projected = x * np.vecdot(x, v) / np.linalg.norm(x) + v = stats.norm().rvs(size=x.shape, random_state=self._random) + projected = x * np.dot(x, v) / np.linalg.norm(x) ortho = v - projected orthonormal = ortho / np.linalg.norm(ortho) return orthonormal @@ -439,7 +455,7 @@ def _rotate_around_vector( ) def _sample_energy( - self, coord: SkyCoord, Etrue: u.GeV, seed: int = 42, N: int = 1 + self, coord: SkyCoord, Etrue: u.GeV, N: int = 1 ) -> tuple[np.ndarray, int, np.ndarray]: """Sample reco energy of events @@ -448,8 +464,6 @@ def _sample_energy( :type coord: SkyCoord :param Etrue: True neutrino energy :type Etrue: u.GeV - :param seed: Random seed, defaults to 42 - :type seed: int, optional :param N: Number of events to sample if coord and Etrue are single values, defaults to 1 :type N: int, optional :return: Etrue indices, declination index, array of reconstructed energies @@ -476,7 +490,7 @@ def _sample_energy( for idx_e in set_e: _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) recoE = self.recoE_sampling[idx_e][c_d].rvs( - size=_index_e.size, random_state=seed + size=_index_e.size, random_state=self._random ) recoE_out[_index_e] = recoE From 839975d411211f7eb2d3eb3075c3a65c5d28fc74 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 10:27:22 +0200 Subject: [PATCH 35/39] add properties, fix imports --- icecube_data_reader/events.py | 3 --- icecube_data_reader/irf/irf.py | 34 ++++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index c89e26b..d2d8e66 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -11,9 +11,6 @@ from astropy.coordinates import SkyCoord from astropy import units as u from astropy.time import Time -import matplotlib.pyplot as plt -from matplotlib import colors -import matplotlib.cm as cm import h5py from time import time as thyme diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index ff9f528..52e6906 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -45,7 +45,12 @@ class IceTracksDR2InstrumentResponseFunction( _STACK = {} - def __init__(self, data: np.ndarray, season: EventType): + def __init__( + self, + data: np.ndarray, + season: EventType, + random_state: np.random.Generator = np.default_rng(seed=42), + ): """ DO NOT instantiate it via init, but rather through the class method `load` @@ -55,8 +60,9 @@ def __init__(self, data: np.ndarray, season: EventType): :type season: EventType """ - self.data = data - self.season = season + self._data = data + self._season = season + self._random = random_state self.etrue_idx = 0 self.dec_idx = 2 @@ -64,12 +70,27 @@ def __init__(self, data: np.ndarray, season: EventType): self.psf_idx = 6 self.ang_err_idx = 8 - self._eres = False - self._ang_res = False - if season in self._STACK: self.__dict__ = self.STACK[season].__dict__ + @property + def random(self): + return self._random + + @random.setter + def random(self, val: np.random.Generator): + if not isinstance(val, np.random.Generator): + raise ValueError("random must be instance of `np.random.Generator`") + self._random = val + + @property + def data(self): + return self._data + + @property + def season(self): + return self._season + def _post_init(self): # Break naming convention because r and t are too close on the keyboard self.recoE_bin_edges = np.empty( @@ -404,6 +425,7 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events coord.representation_type = "cartesian" direction = np.array([coord.x, coord.y, coord.z]) + coord.representation_type = "spherical" new_directions = np.zeros((3, N)) for c, angle in enumerate(deflection_angles): rot_axis = self._sample_orthonormal(direction) From 06f2f2cf3eccf8b227e0ac295e864fb8c75559bf Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 10:33:19 +0200 Subject: [PATCH 36/39] me stupid --- icecube_data_reader/irf/irf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index 52e6906..a4c2974 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -49,7 +49,7 @@ def __init__( self, data: np.ndarray, season: EventType, - random_state: np.random.Generator = np.default_rng(seed=42), + random_state: np.random.Generator = np.random.default_rng(seed=42), ): """ DO NOT instantiate it via init, but rather through the class method `load` @@ -410,13 +410,13 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events density=True, ) _index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze()) - rvs = random.rvs(size=_index_rE.size, random_state=self._random) + rvs = random.rvs(size=_index_rE.size, random_state=self.random) # we sample log10(angle/deg), need the angle in rad # for sampling from the Rayleigh distribution ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs)) deflection_angles = np.atleast_1d( - stats.rayleigh.rvs(scale=ang_errs_out, random_state=self._random) * u.rad + stats.rayleigh.rvs(scale=ang_errs_out, random_state=self.random) * u.rad ) ang_errs_out = (ang_errs_out * u.rad).to(u.deg) # Deflecte like we do in stan: sample rotation axis orthonormal to the event @@ -457,7 +457,7 @@ def _sample_orthonormal(self, x: np.ndarray) -> np.ndarray: :return: Vector orthonormal to input :rtype: np.ndarray """ - v = stats.norm().rvs(size=x.shape, random_state=self._random) + v = stats.norm().rvs(size=x.shape, random_state=self.random) projected = x * np.dot(x, v) / np.linalg.norm(x) ortho = v - projected orthonormal = ortho / np.linalg.norm(ortho) @@ -512,7 +512,7 @@ def _sample_energy( for idx_e in set_e: _index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze()) recoE = self.recoE_sampling[idx_e][c_d].rvs( - size=_index_e.size, random_state=self._random + size=_index_e.size, random_state=self.random ) recoE_out[_index_e] = recoE From db3d0d0670422e309259fa7fe5fd67e5c3ef5e26 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 10:38:49 +0200 Subject: [PATCH 37/39] add missing dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 09ff39e..386f6be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "tqdm", "requests", "requests_cache", + "ipywidgets", ] [project.optional-dependencies] From 1890b6e545ffd5afe732a740a1fd0e707cfee361 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 11:18:17 +0200 Subject: [PATCH 38/39] add tests --- icecube_data_reader/events.py | 10 ++++----- icecube_data_reader/irf/irf.py | 7 ------ icecube_data_reader/lifetime.py | 15 ++++++++++--- tests/test_aeff.py | 6 +++++ tests/test_events.py | 19 ++++++++++++++++ tests/test_irf.py | 40 +++++++++++++++++++++++++++++++-- tests/test_lifetime.py | 11 ++++++++- 7 files changed, 90 insertions(+), 18 deletions(-) diff --git a/icecube_data_reader/events.py b/icecube_data_reader/events.py index d2d8e66..cb08c4f 100644 --- a/icecube_data_reader/events.py +++ b/icecube_data_reader/events.py @@ -188,11 +188,11 @@ def remove(self, i: int) -> None: :param i: Event index :type i: int """ - self._energy = np.delete(self._energy, i) - self._coord = np.delete(self._coord, i) - self._unit_vector = np.delete(self._unit_vector, i, axis=0) - self._event_type = np.delete(self._event_type, i) - self._ang_err = np.delete(self._ang_err, i) + self._energies = np.delete(self._energies, i) + self._coords = np.delete(self._coords, i) + self._unit_vectors = np.delete(self._unit_vectors, i, axis=0) + self._types = np.delete(self._types, i) + self._ang_errs = np.delete(self._ang_errs, i) self._mjd = np.delete(self._mjd, i) def to_file( diff --git a/icecube_data_reader/irf/irf.py b/icecube_data_reader/irf/irf.py index a4c2974..665b9f7 100644 --- a/icecube_data_reader/irf/irf.py +++ b/icecube_data_reader/irf/irf.py @@ -582,10 +582,3 @@ def _create_recoE_distribution( if return_data: return frac_counts, bins, reduced_data return frac_counts, bins - - -if __name__ == "__main__": - from icecube_data_reader.event_types import IC86 - - irf = IceTracksDR2InstrumentResponseFunction.load(IC86) - irf.create_IRF() diff --git a/icecube_data_reader/lifetime.py b/icecube_data_reader/lifetime.py index d743622..20ff5cb 100644 --- a/icecube_data_reader/lifetime.py +++ b/icecube_data_reader/lifetime.py @@ -14,7 +14,15 @@ available_datasets, IceCubeData, ) -from icecube_data_reader.event_types import IC40, IC59, IC79, IC86, suffixes, EventType, Refrigerator +from icecube_data_reader.event_types import ( + IC40, + IC59, + IC79, + IC86, + suffixes, + EventType, + Refrigerator, +) import logging @@ -62,7 +70,7 @@ def lifetime_from_mjd( output[s] = time return output - + def mjd_from_season(self, season: EventType | str) -> tuple[float, float]: """Return MJD_min, MJD_max for provided season @@ -75,10 +83,11 @@ def mjd_from_season(self, season: EventType | str) -> tuple[float, float]: for c, v in enumerate([IC40, IC59, IC79, IC86]): if str(v) == str(season): break + else: + raise ValueError(f"Season {season} is not implemented.") return self._times[c, 0], self._times[c, 1] - def lifetime_from_season( self, *seasons: EventType | str ) -> dict[EventType | u.quantity.Quantity[u.yr]]: diff --git a/tests/test_aeff.py b/tests/test_aeff.py index e69de29..06e90cd 100644 --- a/tests/test_aeff.py +++ b/tests/test_aeff.py @@ -0,0 +1,6 @@ +from icecube_data_reader.irf.effective_area import IceTrackDR2EffectiveArea +from icecube_data_reader.event_types import IC86 + + +def test_load(): + aeff = IceTrackDR2EffectiveArea.load(IC86) diff --git a/tests/test_events.py b/tests/test_events.py index 879d81f..2d09eb2 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -50,6 +50,14 @@ def test_selecting(test_saving): assert et == events.int_types[0] +def test_removing(test_saving): + idx = 5 + _, events = test_saving + N = events.N + events.remove(idx) + assert events.N == N - 1 + + def test_erroneous_selecting(test_saving): idx = 5 events = IceTrackDR2Events.from_event_files(IC40) @@ -61,3 +69,14 @@ def test_erroneous_selecting(test_saving): mask = np.zeros(N + 1, dtype=bool) mask[1] = 1 events.select(mask) + + +def test_energy_cut(test_saving): + _, events = test_saving + + Emin = 5e3 * u.GeV + Emax = 7e4 * u.GeV + + events.apply_energy_cut(Emin=Emin, Emax=Emax) + assert np.all(events.energies <= Emax) + assert np.all(events.energies >= Emin) diff --git a/tests/test_irf.py b/tests/test_irf.py index 73458bb..60467a2 100644 --- a/tests/test_irf.py +++ b/tests/test_irf.py @@ -7,9 +7,14 @@ from astropy.coordinates import SkyCoord -def test_eres(): +@pytest.fixture +def irf(): irf = IceTracksDR2InstrumentResponseFunction.load(IC86) - irf.create_eres() + irf.create_eres(show_progress=False) + return irf + + +def test_eres(irf): Etrue = 10**2.25 * u.GeV et_idx = np.digitize(Etrue, irf.tE_bin_edges) - 1 @@ -26,3 +31,34 @@ def test_eres(): cutoff = pdf >= 1e-2 assert np.all(pytest.approx(pdf[cutoff], abs=1e-2) == n[cutoff]) + + +def test_orthonormal(irf): + vec = np.array([0.0, 0.0, 1.0]) + orthonormal = irf._sample_orthonormal(vec) + assert pytest.approx(np.dot(vec, orthonormal)) == 0.0 + assert pytest.approx(np.linalg.norm(orthonormal)) == 1.0 + + +def test_rotation(irf): + vec = np.array([0.0, 0.0, 1.0]) + rotated = irf._rotate_around_vector( + vec, np.array([1.0, 0.0, 0.0]), np.pi / 2 * u.rad + ) + assert pytest.approx(np.dot(vec, rotated)) == 0.0 + assert pytest.approx(rotated[0]) == 0 + assert pytest.approx(rotated[1]) == -1.0 + assert pytest.approx(rotated[2]) == 0.0 + + +def test_ang_res(irf): + coord = SkyCoord(ra=90 * u.deg, dec=5 * u.deg) + irf.create_ang_res(dec=coord.dec, show_progress=False) + + N = 100 + Etrue = 4e4 * u.GeV + events = irf.sample(coord, Etrue, N=N) + assert events.N == 100 + assert np.unique(events.coords.ra).size == 100 + assert np.unique(events.coords.dec).size == 100 + assert np.unique(events.coords.separation(coord)).size == 100 diff --git a/tests/test_lifetime.py b/tests/test_lifetime.py index 497d78d..3be4cb7 100644 --- a/tests/test_lifetime.py +++ b/tests/test_lifetime.py @@ -23,5 +23,14 @@ def test_lt_from_mjd(): comparison = np.sum(np.diff(lt._data[IC40])[:2]) assert pytest.approx(obs_time) == comparison + def test_mjd_from_season(): - pass + lt = IceTrackDR2LifeTime() + for s in DR2.available_irfs: + mjd_min, mjd_max = lt.mjd_from_season(s) + assert mjd_min == lt._data[s][0, 0] + assert mjd_max == lt.data[s][-1, 1] + + with pytest.raises(ValueError) as e: + lt.mjd_from_season("potato") + assert "potato" in e From d20ce2086e3ceb589858de457b29464eb94d6523 Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 5 Jun 2026 11:21:09 +0200 Subject: [PATCH 39/39] add dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 386f6be..7c6bd02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "requests", "requests_cache", "ipywidgets", + "pandas", ] [project.optional-dependencies]