Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions tests/test_frame_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Parity tests for the frame-native seam (`unfao/frame_extraction.py`).

The golden-equivalence proof the migration relies on: the same data expressed *both* as the
manager's pandas MultiIndex frame and as a views-frames `PredictionFrame` must yield
**identical primitives** through the two seams (`extraction` vs `frame_extraction`), so the
representation-free `delivery/` invariants behave the same on either.
"""

import numpy as np
import pandas as pd

from views_postprocessing.unfao import extraction, frame_extraction
from views_postprocessing.unfao.frames import build_prediction_frame

# One dataset, two representations: 3 cells × 2 months (rows deliberately unsorted).
_ROWS = [(101, 3), (100, 1), (100, 2), (101, 1), (100, 3), (101, 2)]
_TIME = np.array([t for t, _ in _ROWS], dtype=np.int64)
_UNIT = np.array([u for _, u in _ROWS], dtype=np.int64)


def _pandas_frame() -> pd.DataFrame:
idx = pd.MultiIndex.from_tuples(_ROWS, names=["month_id", "priogrid_gid"])
return pd.DataFrame({"pred_ln_sb_best": np.zeros(len(_ROWS))}, index=idx)


def _prediction_frame():
return build_prediction_frame(np.zeros((len(_ROWS), 1), dtype=np.float32), _TIME, _UNIT)


def test_cells_of_parity():
df, pf = _pandas_frame(), _prediction_frame()
assert frame_extraction.cells_of(pf) == extraction.cells_of(df) == {1, 2, 3}


def test_months_of_parity():
df, pf = _pandas_frame(), _prediction_frame()
np.testing.assert_array_equal(
frame_extraction.months_of(pf), extraction.months_of(df)
)
np.testing.assert_array_equal(frame_extraction.months_of(pf), np.array([100, 101]))


def test_months_of_is_int64_ascending():
pf = _prediction_frame()
out = frame_extraction.months_of(pf)
assert out.dtype == np.int64
assert list(out) == sorted(out)
39 changes: 39 additions & 0 deletions views_postprocessing/unfao/frame_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Frame-native representation seam: extract primitives from a views-frames frame.

The **frame counterpart** to ``extraction.py`` (the pandas seam). It returns the *same*
primitives — sets of ints, numpy month arrays — so the representation-free
``views_postprocessing.delivery`` invariants consume them unchanged.

Per the migration design (epic #85): pandas and views-frames do **not** coexist at runtime,
so these are deliberately **siblings** of the pandas readers in ``extraction.py``, not a
replacement, and there is **no shared ``Extractor`` Protocol** (a polymorphic interface
nobody dispatches on would be speculative — YAGNI/ISP). When the forecast interior moves to a
frame (S3 / #88), the manager calls *these*; the pandas readers stay for the still-pandas
historical path (gated on C-40 / S7).

Scope: the readers the forecast interior needs — distinct cells and months from the frame's
index. Deliberately **not** here yet (no speculative code):
- the pandas→``(N, S)`` sample-array unpacker — added when rusty_bucket (#143) declares the
layout (the seam will be *told* the layout, never sniff it);
- a frame-native ``unmapped_cell_count`` — geographic metadata lives on the pandas enriched
frame, not the value frame, until the enrichment moves off pandas (S4 / #89);
- a frame-native ``drop_months_above`` — the observed-range clip is on the *historical*
frame, which is gated on the inbound retirement (S7 / #92).
"""

from __future__ import annotations

import numpy as np
from numpy.typing import NDArray

from views_frames import PredictionFrame


def cells_of(frame: PredictionFrame) -> set[int]:
"""The set of PRIO-GRID cell ids present in the frame (its index ``unit`` axis)."""
return {int(x) for x in np.unique(frame.index.unit)}


def months_of(frame: PredictionFrame) -> NDArray[np.int64]:
"""The distinct month ids present in the frame, ascending (its index ``time`` axis)."""
return np.unique(np.asarray(frame.index.time, dtype=np.int64))
Loading