Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 112 additions & 71 deletions icecube_data_reader/irf/irf.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ def __init__(
self.psf_idx = 6
self.ang_err_idx = 8

if season in self._STACK:
self.__dict__ = self.STACK[season].__dict__

@property
def random(self):
return self._random
Expand All @@ -93,44 +90,29 @@ def season(self):

def _post_init(self):
# Break naming convention because r and t are too close on the keyboard
self.recoE_bin_edges = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.recoE_bin_edges = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21),
)
# Create empty array for rv_histograms storing the energy resolution
# for each bin of true energy and true declination
self.recoE_hists = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.recoE_hists = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20),
)
self.recoE_sampling = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=stats.rv_histogram,
)
# Use lists here for possibly different numbers of bins for each ereco/psf/angerr histogram
self.psf_hists = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
)
self.psf_bin_edges = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
)
self.psf_sampling = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.ang_err_bin_edges = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21),
)
self.ang_err_hists = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.ang_err_hists = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20, 20),
)
self.ang_err_bin_edges = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.psf_bin_edges = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21)
)
self.ang_err_sampling = np.empty(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size),
dtype=np.ndarray,
self.psf_hists = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20)
)

self.faulty = []
Expand Down Expand Up @@ -167,7 +149,9 @@ def create_IRF(
:type show_progress: bool, optional
"""

self.create_ang_res(dec, dec_range, show_progress)
self.create_ang_res(
dec, dec_range, show_progress, desc="Energy and angular resolution"
)

@u.quantity_input
def create_eres(
Expand All @@ -178,6 +162,7 @@ def create_eres(
90.0 * u.deg,
),
show_progress: bool = True,
desc: str = "Energy resolution",
) -> None:
"""Create the energy resolution.

Expand All @@ -190,6 +175,8 @@ def create_eres(
:type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional
:param show_progress: Display progress bar, defaults to True
:type show_progress: bool, optional
:param desc: Description string for progress bar, defaults to "Energy resolution"
:type desc: str, optional
"""

if dec is not None:
Expand All @@ -207,7 +194,7 @@ def create_eres(
for c_d, c_tE in tqdm(
product(dec_range, range(self.log_tE_bin_centers.size)),
disable=not show_progress,
desc="Energy resolution",
desc=desc,
total=self.log_tE_bin_centers.size * dec_size,
):
if isinstance(
Expand All @@ -225,6 +212,7 @@ def create_ang_res(
90.0 * u.deg,
),
show_progress: bool = True,
desc: str = "Angular resolution",
) -> None:
"""Create angular resolution distributions.
The intermediate step of kinematic angle / PSF is irrelevant,
Expand All @@ -242,6 +230,8 @@ def create_ang_res(
:type dec_range: tuple[u.Quantity[u.deg], u.Quantity[u.deg]], optional
:param show_progress: Display progress bar, defaults to True
:type show_progress: bool, optional
:param desc: Description string for progress bar, defaults to "Angular resolution"
:type desc: str, optional
:raises AssertionError: If not all ang_err_bin_edges within one pair of Etrue and DEC
are the same, an AssertionError is raised
"""
Expand All @@ -258,55 +248,84 @@ def create_ang_res(
dec_range = range(dec_min_idx, dec_max_idx)
dec_size = dec_max_idx - dec_min_idx

self.ang_err_bin_edges = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 21),
)
self.ang_err_hists = np.zeros(
(self.log_tE_bin_centers.size, self.sin_dec_bin_centers.size, 20, 20),
)
for c_d, c_tE in tqdm(
product(dec_range, range(self.log_tE_bin_centers.size)),
disable=not show_progress,
desc="Angular resolution",
desc=desc,
total=self.log_tE_bin_centers.size * dec_size,
):
if isinstance(
self.recoE_sampling[c_tE][c_d], stats.rv_histogram
) or isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF):
if isinstance(self.recoE_sampling[c_tE][c_d], stats.rv_histogram):
etrue_data = self.data[
self.data[:, self.etrue_idx] == self.log_tE_bin_edges[c_tE]
]
data = etrue_data[
etrue_data[:, self.dec_idx] == self.dec_bin_edges[c_d]
]
_, _, data = self._create_recoE_distribution(c_tE, c_d, return_data=True)
elif isinstance(self.recoE_sampling[c_tE][c_d], DummyPDF):
self.psf_bin_edges[c_tE, c_d] = np.arange(21)
self.ang_err_bin_edges[c_tE, c_d] = np.arange(21)
else:
_, _, data = self._create_recoE_distribution(
c_tE, c_d, return_data=True
)

all_ang_err_bins = np.empty(
(self.recoE_bin_edges[c_tE, c_d].size - 1,), dtype=np.ndarray
)
all_ang_err_bins = []
all_psf_bins = []
if data is None:
self.psf_bin_edges[c_tE, c_d] = np.arange(21)
self.ang_err_bin_edges[c_tE, c_d] = np.arange(21)
continue
for c_rE, rE in enumerate(self.recoE_bin_edges[c_tE, c_d][:-1]):
# self.ang_err_hists[c_tE, c_d] = np.empty((self.recoE_hists[c_tE, c_d].size), dtype=np.ndarray)
red_data = data[data[:, self.ereco_idx] == rE]
all_ang_err_bins[c_rE] = np.unique(
red_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten()
all_psf_bins.append(
np.unique(red_data[:, self.psf_idx : self.psf_idx + 2].flatten())
)
psf_counts = np.zeros(all_psf_bins[-1].size - 1)
all_ang_err_bins.append(
np.unique(
red_data[:, self.ang_err_idx : self.ang_err_idx + 2].flatten()
)
)
ang_err_counts = np.zeros(all_ang_err_bins[c_rE].size - 1)
for c_ar, ar in enumerate(all_ang_err_bins[c_rE][:-1]):
ang_err_counts = np.zeros(all_ang_err_bins[-1].size - 1)
for c_p, p in enumerate(all_psf_bins[-1][:-1]):
psf_red_data = red_data[red_data[:, self.psf_idx] == p]
psf_counts[c_p] = psf_red_data[:, -1].sum()
for c_a, a in enumerate(all_ang_err_bins[-1][:-1]):
count = psf_red_data[
psf_red_data[:, self.ang_err_idx] == a
].squeeze()
ang_err_counts[c_a] = count[-1]
ang_err_hist = ang_err_counts.copy()
if np.any(ang_err_hist):
self.ang_err_hists[c_tE, c_d, c_rE, c_p] = (
ang_err_hist
/ (ang_err_hist * np.diff(all_ang_err_bins[-1])).sum()
)
psf_hist = psf_counts.copy()
if np.any(psf_hist):
self.psf_hists[c_tE, c_d, c_rE] = (
psf_hist / (psf_hist * np.diff(all_psf_bins[-1])).sum()
)
# repeat for marginalised ang_err (TODO: replace properly)
for c_ar, ar in enumerate(all_ang_err_bins[-1][:-1]):
ang_err_counts[c_ar] = red_data[
red_data[:, self.ang_err_idx] == ar, -1
].sum()
hist = ang_err_counts.copy()
self.ang_err_hists[c_tE, c_d, c_rE] = (
hist / (hist * np.diff(all_ang_err_bins[c_rE])).sum()
)
for low, high in pairwise(all_ang_err_bins):
if not np.all(low == high):
# Has been tested in a notebook, should be fine!
raise AssertionError(
"Not all ang_err bins are the same! Investigate manually and fix me."
)
for low, high in pairwise(all_psf_bins):
if not np.all(low == high):
# Has been tested in a notebook, should be fine!
raise AssertionError(
"Not all psf bins are the same! Investigate manually and fix me."
)

self.ang_err_bin_edges[c_tE, c_d] = all_ang_err_bins[0].copy()
self.psf_bin_edges[c_tE, c_d] = all_psf_bins[0].copy()

@classmethod
def load(cls, season: EventType) -> Self:
Expand Down Expand Up @@ -338,6 +357,12 @@ def load(cls, season: EventType) -> Self:
# use log binning for angular quantities
data[:, 6:-1] = np.log10(data[:, 6:-1])
irf = cls(data, season)
if season in cls._STACK:
logger.info(f"loading IRF for {season} from stack")
irf.__dict__ = cls._STACK[season].__dict__
return irf
else:
cls._STACK[season] = irf
irf.log_tE_bin_edges = log_tE_bin_edges
irf.log_tE_bin_centers = log_tE_bin_centers
irf.tE_bin_edges = tE_bin_edges
Expand Down Expand Up @@ -394,6 +419,7 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events

recoE = np.atleast_1d(recoE)
ang_errs_out = np.zeros(recoE.size)
psf = np.zeros(recoE.size)

for idx_e in set_e:
_index_e = np.atleast_1d(np.argwhere(idx_e == tE_idx).squeeze())
Expand All @@ -404,30 +430,46 @@ def sample(self, coord: SkyCoord, Etrue: u.GeV, N: int = 1) -> IceTrackDR2Events
for idx_rE in set_rE:
random = stats.rv_histogram(
(
self.ang_err_hists[idx_e, c_d, idx_rE],
self.ang_err_bin_edges[idx_e, c_d],
self.psf_hists[idx_e, c_d, idx_rE],
self.psf_bin_edges[idx_e, c_d],
),
density=True,
)
_index_rE = np.atleast_1d(np.argwhere(idx_rE == reco_idxs).squeeze())
rvs = random.rvs(size=_index_rE.size, random_state=self.random)
# we sample log10(angle/deg), need the angle in rad
# for sampling from the Rayleigh distribution
ang_errs_out[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs))
psf[_index_e[_index_rE]] = np.deg2rad(np.power(10, rvs))
psf_idxs = (
np.digitize(
psf[_index_e[_index_rE]], self.psf_bin_edges[idx_e, c_d]
)
- 1
)
set_psf = np.unique(psf_idxs)
for idx_psf in set_psf:
_index_psf = np.atleast_1d(
np.argwhere(idx_psf == psf_idxs).squeeze()
)
random = stats.rv_histogram(
(
self.ang_err_hists[idx_e, c_d, idx_rE, idx_psf],
self.ang_err_bin_edges[idx_e, c_d],
),
density=True,
)
rvs = random.rvs(size=_index_psf.size, random_state=self.random)
ang_errs_out[_index_e[_index_rE[_index_psf]]] = np.power(10, rvs)

deflection_angles = np.atleast_1d(
stats.rayleigh.rvs(scale=ang_errs_out, random_state=self.random) * u.rad
)
ang_errs_out = (ang_errs_out * u.rad).to(u.deg)
ang_errs_out = ang_errs_out * u.deg
psf = psf * u.rad
# Deflecte like we do in stan: sample rotation axis orthonormal to the event
# sample angle `theta` by which we rotate from Rayleigh dist with sampled ang_err as its sigma
# rotate the initial direction/coord by `theta` around rotation axis
# angle psf determines the amount of deflection, ang_err determines the reconstructed
# angular uncertainty

coord.representation_type = "cartesian"
direction = np.array([coord.x, coord.y, coord.z])
coord.representation_type = "spherical"
new_directions = np.zeros((3, N))
for c, angle in enumerate(deflection_angles):
for c, angle in enumerate(psf):
rot_axis = self._sample_orthonormal(direction)
new_directions[:, c] = self._rotate_around_vector(
direction, rot_axis, angle
Expand Down Expand Up @@ -546,7 +588,6 @@ def _create_recoE_distribution(

if (c_e, c_d) in self.faulty:
self.recoE_bin_edges[c_e, c_d] = np.arange(21)
self.recoE_hists[c_e, c_d] = np.zeros(20)
self.recoE_sampling[c_e, c_d] = DummyPDF()
if return_data:
return None, None, None
Expand All @@ -572,12 +613,12 @@ def _create_recoE_distribution(
reduced_data[b == reduced_data[:, self.ereco_idx], -1]
)

self.recoE_sampling[c_e][c_d] = stats.rv_histogram(
self.recoE_sampling[c_e, c_d] = stats.rv_histogram(
(frac_counts, bins), density=False
)
self.recoE_bin_edges[c_e][c_d] = bins
self.recoE_bin_edges[c_e, c_d] = bins
# summed = frac_counts.sum()
self.recoE_hists[c_e][c_d] = frac_counts / (frac_counts * np.diff(bins)).sum()
self.recoE_hists[c_e, c_d] = frac_counts / (frac_counts * np.diff(bins)).sum()

if return_data:
return frac_counts, bins, reduced_data
Expand Down
8 changes: 7 additions & 1 deletion tests/test_irf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from icecube_data_reader.irf.irf import IceTracksDR2InstrumentResponseFunction
from icecube_data_reader.event_types import IC86
from icecube_data_reader.event_types import IC86, DR2
import numpy as np
import astropy.units as u
from astropy.coordinates import SkyCoord
Expand Down Expand Up @@ -62,3 +62,9 @@ def test_ang_res(irf):
assert np.unique(events.coords.ra).size == 100
assert np.unique(events.coords.dec).size == 100
assert np.unique(events.coords.separation(coord)).size == 100


def test_load():
for et in DR2.available_irfs:
irf = IceTracksDR2InstrumentResponseFunction.load(et)
irf.create_IRF()