diff --git a/tests/test_frame_extraction.py b/tests/test_frame_extraction.py new file mode 100644 index 0000000..066fae9 --- /dev/null +++ b/tests/test_frame_extraction.py @@ -0,0 +1,47 @@ +"""Parity tests for the frame-native seam (`unfao/frame_extraction.py`). + +The golden-equivalence proof the migration relies on: the same data expressed *both* as the +manager's pandas MultiIndex frame and as a views-frames `PredictionFrame` must yield +**identical primitives** through the two seams (`extraction` vs `frame_extraction`), so the +representation-free `delivery/` invariants behave the same on either. +""" + +import numpy as np +import pandas as pd + +from views_postprocessing.unfao import extraction, frame_extraction +from views_postprocessing.unfao.frames import build_prediction_frame + +# One dataset, two representations: 3 cells × 2 months (rows deliberately unsorted). +_ROWS = [(101, 3), (100, 1), (100, 2), (101, 1), (100, 3), (101, 2)] +_TIME = np.array([t for t, _ in _ROWS], dtype=np.int64) +_UNIT = np.array([u for _, u in _ROWS], dtype=np.int64) + + +def _pandas_frame() -> pd.DataFrame: + idx = pd.MultiIndex.from_tuples(_ROWS, names=["month_id", "priogrid_gid"]) + return pd.DataFrame({"pred_ln_sb_best": np.zeros(len(_ROWS))}, index=idx) + + +def _prediction_frame(): + return build_prediction_frame(np.zeros((len(_ROWS), 1), dtype=np.float32), _TIME, _UNIT) + + +def test_cells_of_parity(): + df, pf = _pandas_frame(), _prediction_frame() + assert frame_extraction.cells_of(pf) == extraction.cells_of(df) == {1, 2, 3} + + +def test_months_of_parity(): + df, pf = _pandas_frame(), _prediction_frame() + np.testing.assert_array_equal( + frame_extraction.months_of(pf), extraction.months_of(df) + ) + np.testing.assert_array_equal(frame_extraction.months_of(pf), np.array([100, 101])) + + +def test_months_of_is_int64_ascending(): + pf = _prediction_frame() + out = frame_extraction.months_of(pf) + assert out.dtype == np.int64 + assert list(out) == sorted(out) diff --git a/views_postprocessing/unfao/frame_extraction.py b/views_postprocessing/unfao/frame_extraction.py new file mode 100644 index 0000000..9d22843 --- /dev/null +++ b/views_postprocessing/unfao/frame_extraction.py @@ -0,0 +1,39 @@ +"""Frame-native representation seam: extract primitives from a views-frames frame. + +The **frame counterpart** to ``extraction.py`` (the pandas seam). It returns the *same* +primitives — sets of ints, numpy month arrays — so the representation-free +``views_postprocessing.delivery`` invariants consume them unchanged. + +Per the migration design (epic #85): pandas and views-frames do **not** coexist at runtime, +so these are deliberately **siblings** of the pandas readers in ``extraction.py``, not a +replacement, and there is **no shared ``Extractor`` Protocol** (a polymorphic interface +nobody dispatches on would be speculative — YAGNI/ISP). When the forecast interior moves to a +frame (S3 / #88), the manager calls *these*; the pandas readers stay for the still-pandas +historical path (gated on C-40 / S7). + +Scope: the readers the forecast interior needs — distinct cells and months from the frame's +index. Deliberately **not** here yet (no speculative code): +- the pandas→``(N, S)`` sample-array unpacker — added when rusty_bucket (#143) declares the + layout (the seam will be *told* the layout, never sniff it); +- a frame-native ``unmapped_cell_count`` — geographic metadata lives on the pandas enriched + frame, not the value frame, until the enrichment moves off pandas (S4 / #89); +- a frame-native ``drop_months_above`` — the observed-range clip is on the *historical* + frame, which is gated on the inbound retirement (S7 / #92). +""" + +from __future__ import annotations + +import numpy as np +from numpy.typing import NDArray + +from views_frames import PredictionFrame + + +def cells_of(frame: PredictionFrame) -> set[int]: + """The set of PRIO-GRID cell ids present in the frame (its index ``unit`` axis).""" + return {int(x) for x in np.unique(frame.index.unit)} + + +def months_of(frame: PredictionFrame) -> NDArray[np.int64]: + """The distinct month ids present in the frame, ascending (its index ``time`` axis).""" + return np.unique(np.asarray(frame.index.time, dtype=np.int64))