diff --git a/evaluation_dashboard_app/.dockerignore b/evaluation_dashboard_app/.dockerignore
index 08992ee..ea95081 100644
--- a/evaluation_dashboard_app/.dockerignore
+++ b/evaluation_dashboard_app/.dockerignore
@@ -3,6 +3,8 @@ __pycache__
 .git
 .gitignore
 *.md
+!Readme.md
+!Readme.en.md
 .env
 .venv
 venv
diff --git a/evaluation_dashboard_app/Dockerfile b/evaluation_dashboard_app/Dockerfile
index 891c8a8..b45eb83 100644
--- a/evaluation_dashboard_app/Dockerfile
+++ b/evaluation_dashboard_app/Dockerfile
@@ -5,6 +5,7 @@
 # Build example: docker build --secret id=ssh,src=$HOME/.ssh/id_rsa -t evaluation-dashboard .
 # Match ROS distro at build: --build-arg ROS_DISTRO=humble (or iron, jazzy, etc.)
 ARG ROS_DISTRO=humble
+ARG WEBAUTOAUTHCLI_COMMIT=204629123fa58ab0be0966c795703324e34851ec
 FROM ros:${ROS_DISTRO}
 
 WORKDIR /app
@@ -12,13 +13,29 @@ WORKDIR /app
 # Make ROS_DISTRO available inside the container at runtime (for entrypoint sourcing)
 ENV ROS_DISTRO=${ROS_DISTRO}
 
-# Install needed system packages: python, geos, git/ssh (for pip install from private repos), pipx, OpenGL for matplotlib
+# Install needed system packages: python, geos, git/ssh (for pip install from private repos), pipx,
+# OpenGL for matplotlib, and Chrome for Kaleido static image export.
 RUN apt-get update \
  && DEBIAN_FRONTEND=noninteractive \
     apt-get install -y --no-install-recommends \
-      python3-pip python3-venv libgeos-c1v5 bash git openssh-client libgl1-mesa-glx libgl1-mesa-dri \
+      python3-pip python3-venv libgeos-c1v5 bash git openssh-client \
+      libgl1-mesa-glx libgl1-mesa-dri ca-certificates curl gnupg \
+      fontconfig fonts-noto-cjk fonts-ipafont-gothic \
+ && install -d -m 0755 /etc/apt/keyrings \
+ && curl -fsSL https://dl.google.com/linux/linux_signing_key.pub \
+    | gpg --dearmor -o /etc/apt/keyrings/google-chrome.gpg \
+ && chmod a+r /etc/apt/keyrings/google-chrome.gpg \
+ && echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/google-chrome.gpg] https://dl.google.com/linux/chrome/deb/ stable main" \
+    > /etc/apt/sources.list.d/google-chrome.list \
+ && apt-get update \
+ && DEBIAN_FRONTEND=noninteractive \
+    apt-get install -y --no-install-recommends google-chrome-stable \
+ && ln -sf /usr/bin/google-chrome-stable /usr/bin/google-chrome \
+ && fc-cache -f \
  && rm -rf /var/lib/apt/lists/*
 
+ENV CHROME_BIN=/usr/bin/google-chrome-stable
+
 # Upgrade pip, install pipx, ensure pipx path available
 RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel packaging pipx \
  && pipx ensurepath
@@ -32,18 +49,32 @@ RUN --mount=type=secret,id=ssh,dst=/tmp/ssh_key \
  && chmod 700 /root/.ssh \
  && chmod 600 /root/.ssh/id_rsa \
  && export GIT_SSH_COMMAND="ssh -i /root/.ssh/id_rsa -o StrictHostKeyChecking=accept-new" \
+ && python3 -m pip install --no-cache-dir "git+ssh://git@github.com/tier4/WebAutoAuthCLI.git@v2.23.1" \
  && python3 -m pip install --no-cache-dir "git+ssh://git@github.com/tier4/webauto-auth-py.git" \
  && pipx install "git+ssh://git@github.com/tier4/v_and_v_util.git" \
  && python3 -m pip install --no-cache-dir "git+ssh://git@github.com/tier4/autoware_perception_evaluation.git" \
- && python3 -m pip install --no-cache-dir "git+ssh://git@github.com/tier4/perception_catalog_analyzer.git" \
+ && python3 -m pip install --no-cache-dir \
+      "bokeh>=3.7.3" \
+      "lz4>=4.4.4" \
+      "pyarrow==19.0.0" \
+      "tabulate>=0.9.0" \
+      "typer>=0.16.0" \
+      "weasyprint>=65.1" \
+ && git clone --depth 1 git@github.com:tier4/perception_catalog_analyzer.git /opt/perception_catalog_analyzer \
+ && python3 -m pip install --no-cache-dir --no-deps -e /opt/perception_catalog_analyzer \
  && rm -rf /root/.ssh # Clean up private key ASAP for security
 
-# Install public dependencies (after SSH deps so SSH failures surface fast)
+# Install public dependencies (after SSH deps so SSH failures surface fast).
+# Keep analyzer runtime deps that are not installed via `-e --no-deps` here too.
 COPY requirements-docker.txt .
 RUN python3 -m pip install --no-cache-dir -r requirements-docker.txt
 
+
 # Copy application code and config
 COPY Overview.py .
+COPY Readme.md .
+COPY Readme.en.md .
+COPY catalogs.json .
 COPY pages/ pages/
 COPY lib/ lib/
 COPY worker/ worker/
diff --git a/evaluation_dashboard_app/Overview.py b/evaluation_dashboard_app/Overview.py
old mode 100644
new mode 100755
index 3a08af7..0843bd4
--- a/evaluation_dashboard_app/Overview.py
+++ b/evaluation_dashboard_app/Overview.py
@@ -1,12 +1,39 @@
 import streamlit as st
 import pandas as pd
+import io
+import urllib.parse
+import zipfile
+import yaml
 from pathlib import Path
 from lib.run_loader import load_run
-from lib.path_utils import get_data_root, get_data_root_display, list_run_directories, path_display
+from lib.path_utils import (
+    get_data_root,
+    get_data_root_display,
+    get_run_display_name,
+    list_run_directories,
+    path_display,
+    resolve_run_subdirectory,
+)
 import plotly.express as px
 import plotly.graph_objects as go
 from lib.user_config import UserConfig
-from lib.summary_compare import build_summary_delta
+from lib.summary_compare import build_summary_delta, summary_delta_overlap_stats
+from lib.overview_pdf_report import build_overview_pdf_report, make_report_filename
+from lib.specsheet_report import (
+    DEFAULT_SPECSHEET_LABELS,
+    DEFAULT_SPECSHEET_PROJECT_ID,
+    DEFAULT_SPECSHEET_TOPIC,
+    DEFAULT_TREND_METADATA_TEXT,
+    collect_candidate_specsheet_labels,
+    generate_specsheet_pdf,
+    get_release_specsheet_context,
+    get_specsheet_artifact_paths,
+    is_specsheet_pdf_fresh,
+    parse_trend_metadata_text,
+    progress_fraction_from_message,
+    resolve_specsheet_generation_run_path,
+    write_trend_metadata,
+)
 from lib.page_chrome import (
     inject_app_page_styles,
     render_loaded_data_section,
@@ -30,12 +57,12 @@
 # ====== CONFIG AND CONSTANTS ======
 st.set_page_config(page_title="Overview", layout="wide", initial_sidebar_state="expanded")
 inject_app_page_styles()
-if running_in_docker():
-    st.sidebar.page_link(
-        "pages/99_Deployment_Debug.py",
-        label="Deployment debug",
-        icon="🐳",
-    )
+# if running_in_docker():
+#     st.sidebar.page_link(
+#         "pages/99_Deployment_Debug.py",
+#         label="Deployment debug",
+#         icon="🐳",
+#     )
 RUN_ROOT = get_data_root()
 PRODUCT_LABEL_JA = {
     "Occlusion-Case": "遮蔽ケース",
@@ -262,7 +289,19 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
 
 # List run directories (subdirectories in RUN_ROOT)
 run_dirs = list_run_directories()
-run_names = [p.name for p in run_dirs]
+run_names = [get_run_display_name(p) for p in run_dirs]
+
+
+def _coerce_run_param_to_display_name(value: str | None) -> str:
+    raw = str(value or "").strip()
+    if not raw:
+        return ""
+    if raw in run_names:
+        return raw
+    resolved, err = resolve_run_subdirectory(raw)
+    if err or resolved is None:
+        return ""
+    return get_run_display_name(resolved)
 
 if not run_dirs:
     st.warning(f"No runs found in '{get_data_root_display()}'.\n\nPlease add at least one sub-directory with evaluation results, e.g. `{get_data_root_display()}/my_eval_run/`.")
@@ -280,12 +319,14 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
 
 saved_run_a = user_config.get("overview_run_a", run_names[0] if run_names else "")
 # URL override (only if valid)
-if url_run_a in run_names:
-    saved_run_a = url_run_a
+url_run_a_display = _coerce_run_param_to_display_name(url_run_a)
+if url_run_a_display in run_names:
+    saved_run_a = url_run_a_display
 
 run_a_index = run_names.index(saved_run_a) if saved_run_a in run_names else 0
-run_a_dir = st.sidebar.selectbox("Baseline (A)", run_dirs, index=run_a_index, format_func=lambda p: p.name)
-user_config.set("overview_run_a", run_a_dir.name)
+run_a_dir = st.sidebar.selectbox("Baseline (A)", run_dirs, index=run_a_index, format_func=get_run_display_name)
+run_a_name = get_run_display_name(run_a_dir)
+user_config.set("overview_run_a", run_a_name)
 
 compare_run_names = []  # list of run names for candidates B, C, D, ...
 if mode == "Compare Mode":
@@ -298,7 +339,11 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
         if not saved_compare and run_names:
             saved_compare = [run_names[1]] if len(run_names) > 1 else [run_names[0]]
         if url_compare_runs:
-            valid_url = [r for r in url_compare_runs if r in run_names]
+            valid_url = [
+                display
+                for display in (_coerce_run_param_to_display_name(r) for r in url_compare_runs)
+                if display in run_names
+            ]
             if valid_url:
                 saved_compare = valid_url
         st.session_state["overview_compare_run_names"] = list(saved_compare)
@@ -315,10 +360,10 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
                 f"Candidate ({letter})",
                 run_dirs,
                 index=idx,
-                format_func=lambda p: p.name,
+                format_func=get_run_display_name,
                 key=f"compare_run_select_{i}",
             )
-            new_compare_run_names.append(selected.name)
+            new_compare_run_names.append(get_run_display_name(selected))
         with col_rm:
             if len(compare_run_names) > 1:
                 if st.button("✕", key=f"compare_remove_{i}", help="Remove this run"):
@@ -332,7 +377,7 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
     st.session_state["overview_compare_run_names"] = compare_run_names
 
     if st.sidebar.button("➕ Add run", help="Add another run to compare"):
-        used = {run_a_dir.name} | set(compare_run_names)
+        used = {run_a_name} | set(compare_run_names)
         next_name = next((n for n in run_names if n not in used), run_names[0])
         new_list = compare_run_names + [next_name]
         st.session_state["overview_compare_run_names"] = new_list
@@ -345,13 +390,13 @@ def show_grouped_metrics_plot_multi(df_list, run_labels, group_col, label_map=No
 
 compare_run_dirs = []
 if mode == "Compare Mode" and compare_run_names:
-    name_to_dir = {p.name: p for p in run_dirs}
+    name_to_dir = {get_run_display_name(p): p for p in run_dirs}
     compare_run_dirs = [name_to_dir[n] for n in compare_run_names if n in name_to_dir]
 
 # ====== SYNC URL (NON-DESTRUCTIVE) ======
 query = {
     "mode": "compare" if mode == "Compare Mode" else "single",
-    "run_a": run_a_dir.name,
+    "run_a": run_a_name,
 }
 for j, name in enumerate(compare_run_names):
     query[f"run_{chr(98 + j)}"] = name  # run_b, run_c, ...
@@ -400,17 +445,93 @@ def safe_load_run(path, label='Run'):
         st.session_state.pop(key, None)
 
 # ====== MAIN PAGE METRICS & CHARTS ======
-_ov_entries = [("Baseline · A", path_display(runA["path"]))]
+_ov_entries = [("Baseline · A", get_run_display_name(runA["path"]))]
 if mode == "Compare Mode" and compare_run_dirs:
     all_runs = st.session_state["all_runs"]
     run_labels = st.session_state["run_labels"]
     for i in range(1, len(all_runs)):
-        _ov_entries.append((f"Candidate · {run_labels[i]}", path_display(all_runs[i]["path"])))
+        _ov_entries.append((f"Candidate · {run_labels[i]}", get_run_display_name(all_runs[i]["path"])))
 render_loaded_data_section(_ov_entries)
-share_q = f"mode={'compare' if mode == 'Compare Mode' else 'single'}&run_a={run_a_dir.name}"
+
+if mode == "Compare Mode" and compare_run_dirs:
+    _all_r = st.session_state.get("all_runs")
+    _lbls = st.session_state.get("run_labels")
+    if _all_r and _lbls and all(r.get("summary") is not None for r in _all_r):
+        _cand_stats: list[tuple[str, dict]] = []
+        _overlap_rows: list[dict] = []
+        _empty_labels: list[str] = []
+        _invalid_msgs: list[str] = []
+        for i in range(1, len(_all_r)):
+            cand = _lbls[i]
+            stt = summary_delta_overlap_stats(_all_r[0]["summary"], _all_r[i]["summary"])
+            _cand_stats.append((cand, stt))
+            if not stt.get("valid"):
+                _invalid_msgs.append(f"**{cand}:** {stt.get('error', 'Unknown error')}")
+                continue
+            join_s = " + ".join(stt["key_cols"])
+            _overlap_rows.append(
+                {
+                    "Candidate": cand,
+                    "Join keys": join_s,
+                    "Baseline rows": stt["n_rows_baseline"],
+                    "Candidate rows": stt["n_rows_candidate"],
+                    "Matched (Δ rows)": stt["n_matched_keys"],
+                    "Keys only in A": stt["n_only_baseline"],
+                    "Keys only in candidate": stt["n_only_candidate"],
+                }
+            )
+            if stt["matched_empty"]:
+                _empty_labels.append(cand)
+        if _invalid_msgs:
+            st.warning(
+                "Cannot compute Summary delta alignment for some runs:\n\n"
+                + "\n\n".join(_invalid_msgs)
+            )
+        if _empty_labels:
+            _join_cols = next(
+                (" + ".join(f"`{c}`" for c in s["key_cols"]) for cnd, s in _cand_stats if cnd in _empty_labels and s.get("valid")),
+                "`id` (or `id` + `perception_label` when both have it)",
+            )
+            st.warning(
+                "**TP Summary delta views will be empty** for candidate(s) "
+                f"**{', '.join(_empty_labels)}**: baseline **A** and those runs share **no** overlapping "
+                f"Summary join keys ({_join_cols}). "
+                "The inner join drops every row; use **Baseline** or **Candidate** in the TP Summary sidebar, "
+                "or choose runs whose Summary rows use the same keys. "
+                "Open **Summary key overlap (delta alignment)** below for row counts and sample keys "
+                "that appear on only one side."
+            )
+            with st.expander("Summary key overlap (delta alignment) — details", expanded=False):
+                st.markdown(
+                    "Delta tables on **TP Summary** inner-join baseline **A** to each candidate on the "
+                    "same keys as here: **`id`**, or **`id` + `perception_label`** when both summaries "
+                    "include `perception_label`. Only **matched** keys produce rows; the rest are ignored."
+                )
+                st.dataframe(pd.DataFrame(_overlap_rows), width="stretch", hide_index=True)
+                for cand, stt in _cand_stats:
+                    if not stt.get("valid"):
+                        continue
+                    sb = stt["sample_only_baseline"]
+                    sc = stt["sample_only_candidate"]
+                    if not sb and not sc:
+                        continue
+                    st.markdown(f"**Examples — candidate {cand}**")
+                    c1, c2 = st.columns(2)
+                    with c1:
+                        st.caption("Up to 5 keys only in baseline A")
+                        st.code("\n".join(sb) if sb else "(none)")
+                    with c2:
+                        st.caption(f"Up to 5 keys only in {cand}")
+                        st.code("\n".join(sc) if sc else "(none)")
+
+share_query = {
+    "mode": "compare" if mode == "Compare Mode" else "single",
+    "run_a": run_a_name,
+}
 if mode == "Compare Mode" and compare_run_names:
     for j, name in enumerate(compare_run_names):
-        share_q += f"&run_{chr(98 + j)}={name}"
+        share_query[f"run_{chr(98 + j)}"] = name
+share_q = urllib.parse.urlencode(share_query)
 render_share_link_callout(
     share_q,
     caption="Append to your server URL (e.g. `https://host:8501/?` + query). Build links from Data Management too.",
@@ -542,3 +663,514 @@ def show_tp_mean_by_label_compare(df_list, run_labels, label_col, label_jp_map=N
     with st.expander("Show metric breakdowns by label", expanded=False):
         show_grouped_metrics_plot(df_summary, group_col="perception_label", mode="single")
         show_grouped_metrics_plot(df_summary, group_col="product_label", label_map=PRODUCT_LABEL_JA, mode="single")
+
+
+st.divider()
+section_header("Export Dashboard Report", "Generate a curated PDF from the current Overview selection and filters.")
+_report_runs = st.session_state.get("all_runs") if mode == "Compare Mode" and compare_run_dirs else [runA]
+_report_labels = st.session_state.get("run_labels") if mode == "Compare Mode" and compare_run_dirs else ["A"]
+_report_filters = {
+    "perception_labels": filters.get("perception_labels", []),
+    "product_labels": filters.get("product_labels", []),
+}
+_report_key = {
+    "mode": mode,
+    "paths": [str(r.get("path")) for r in _report_runs],
+    "perception_labels": list(_report_filters["perception_labels"]),
+    "product_labels": list(_report_filters["product_labels"]),
+}
+pdf_col1, pdf_col2 = st.columns([1.2, 2.8])
+with pdf_col1:
+    if st.button("Generate Evaluation Dashboard Report", type="primary", use_container_width=True):
+        _pdf_status = st.empty()
+        try:
+            def _update_pdf_status(message: str) -> None:
+                _pdf_status.info(f"Generating report: {message}")
+
+            _update_pdf_status("starting")
+            pdf_bytes = build_overview_pdf_report(
+                mode=mode,
+                run_records=_report_runs,
+                run_labels=_report_labels,
+                filters=_report_filters,
+                product_label_map=PRODUCT_LABEL_JA,
+                progress_callback=_update_pdf_status,
+            )
+            st.session_state["overview_pdf_report_bytes"] = pdf_bytes
+            st.session_state["overview_pdf_report_key"] = _report_key
+            run_names_for_file = [get_run_display_name(r["path"]) for r in _report_runs if r.get("path") is not None]
+            st.session_state["overview_pdf_report_name"] = make_report_filename(run_names_for_file)
+            _pdf_status.success("PDF report is ready.")
+        except Exception as e:
+            st.session_state.pop("overview_pdf_report_bytes", None)
+            st.session_state.pop("overview_pdf_report_key", None)
+            st.session_state.pop("overview_pdf_report_name", None)
+            _pdf_status.error(f"PDF generation failed: {e}")
+with pdf_col2:
+    _pdf_ready = (
+        st.session_state.get("overview_pdf_report_bytes") is not None
+        and st.session_state.get("overview_pdf_report_key") == _report_key
+    )
+    if _pdf_ready:
+        st.download_button(
+            "Download Evaluation Dashboard Report",
+            data=st.session_state["overview_pdf_report_bytes"],
+            file_name=st.session_state.get("overview_pdf_report_name", "overview_report.pdf"),
+            mime="application/pdf",
+            use_container_width=True,
+        )
+
+specsheet_title = "Export Specsheet Report"
+section_header(
+    specsheet_title,
+    "Generate the release-oriented spec-sheet PDF.",
+)
+
+_specsheet_run_records = _report_runs
+_specsheet_run_labels = _report_labels
+_specsheet_run_options = {}
+for label, record in zip(_specsheet_run_labels, _specsheet_run_records):
+    source_path = record["path"]
+    target_path = resolve_specsheet_generation_run_path(source_path)
+    release_context = get_release_specsheet_context(source_path)
+    option_label = f"{label} · {get_run_display_name(source_path)}"
+    if release_context is not None and target_path != source_path:
+        option_label = (
+            f"{label} · {get_run_display_name(source_path)} "
+            f"(PDF body: {get_run_display_name(target_path)})"
+        )
+    _specsheet_run_options[option_label] = {
+        "source_path": source_path,
+        "target_path": target_path,
+        "release_context": release_context,
+    }
+_specsheet_run_option_keys = list(_specsheet_run_options.keys())
+_default_specsheet_run_selection = _specsheet_run_option_keys[:1]
+_default_specsheet_labels = list(DEFAULT_SPECSHEET_LABELS)
+_default_specsheet_project_id = st.session_state.get("specsheet_project_id", DEFAULT_SPECSHEET_PROJECT_ID)
+_default_specsheet_topic = st.session_state.get("specsheet_topic_name", DEFAULT_SPECSHEET_TOPIC)
+_single_specsheet_run_path = resolve_specsheet_generation_run_path(_specsheet_run_records[0]["path"])
+_default_specsheet_version = get_run_display_name(_single_specsheet_run_path)
+
+if mode == "Compare Mode":
+    selected_specsheet_run_keys = st.multiselect(
+        "Runs to generate spec-sheet for",
+        options=list(_specsheet_run_options.keys()),
+        default=_default_specsheet_run_selection,
+        key="specsheet_target_runs",
+        help="Spec-sheet generation is single-run, so multiple selected runs are processed one by one.",
+    )
+else:
+    selected_specsheet_run_keys = _specsheet_run_option_keys[:1]
+
+_selected_specsheet_entries = [
+    _specsheet_run_options[key]
+    for key in selected_specsheet_run_keys
+    if key in _specsheet_run_options
+]
+selected_specsheet_run_paths = []
+_seen_specsheet_targets = set()
+for entry in _selected_specsheet_entries:
+    target_path = entry["target_path"]
+    target_key = str(target_path.resolve())
+    if target_key in _seen_specsheet_targets:
+        continue
+    selected_specsheet_run_paths.append(target_path)
+    _seen_specsheet_targets.add(target_key)
+selected_specsheet_release_contexts = []
+_seen_specsheet_releases = set()
+for entry in _selected_specsheet_entries:
+    release_context = entry["release_context"]
+    if release_context is None:
+        continue
+    release_dir = release_context.get("release_dir")
+    release_key = str(release_dir.resolve()) if isinstance(release_dir, Path) else str(release_dir)
+    if release_key in _seen_specsheet_releases:
+        continue
+    selected_specsheet_release_contexts.append(release_context)
+    _seen_specsheet_releases.add(release_key)
+_active_specsheet_paths = [get_specsheet_artifact_paths(path) for path in selected_specsheet_run_paths]
+_selected_trend_metadata_text = ""
+_selected_trend_metadata_path = None
+if len(selected_specsheet_release_contexts) == 1:
+    candidate_path = selected_specsheet_release_contexts[0].get("metadata")
+    if isinstance(candidate_path, Path) and candidate_path.exists():
+        _selected_trend_metadata_path = candidate_path
+if _selected_trend_metadata_path is None and len(_active_specsheet_paths) == 1 and _active_specsheet_paths[0]["trend_metadata"].exists():
+    _selected_trend_metadata_path = _active_specsheet_paths[0]["trend_metadata"]
+if _selected_trend_metadata_path is not None:
+    try:
+        _selected_trend_metadata_text = _selected_trend_metadata_path.read_text(encoding="utf-8")
+    except Exception:
+        _selected_trend_metadata_text = ""
+
+_selected_metadata_defaults = {}
+if _selected_trend_metadata_text:
+    try:
+        _selected_metadata_defaults = parse_trend_metadata_text(_selected_trend_metadata_text)
+    except Exception:
+        _selected_metadata_defaults = {}
+
+def _specsheet_title_version_from_metadata(metadata: dict) -> str:
+    explicit = str(metadata.get("version_abbr") or "").strip()
+    if explicit:
+        return explicit
+    version = str(metadata.get("pilot_auto_version") or "").strip()
+    if version.lower().startswith("pilot.auto "):
+        return version[len("Pilot.Auto "):].strip()
+    return version
+
+_metadata_default_version = _specsheet_title_version_from_metadata(_selected_metadata_defaults)
+if _metadata_default_version:
+    _default_specsheet_version = _metadata_default_version
+_metadata_trend_topic = str(_selected_metadata_defaults.get("topic_name") or "").strip()
+if (
+    _metadata_trend_topic
+    and _metadata_trend_topic != DEFAULT_SPECSHEET_TOPIC
+    and st.session_state.get("specsheet_topic_name") == _metadata_trend_topic
+):
+    st.session_state["specsheet_topic_name"] = DEFAULT_SPECSHEET_TOPIC
+    _default_specsheet_topic = DEFAULT_SPECSHEET_TOPIC
+
+_specsheet_defaults_source = str(_selected_trend_metadata_path or _single_specsheet_run_path)
+_previous_auto_version = st.session_state.get("specsheet_version_auto_value")
+_current_version = st.session_state.get("specsheet_version")
+if (
+    st.session_state.get("specsheet_version_auto_source") != _specsheet_defaults_source
+    and (
+        "specsheet_version" not in st.session_state
+        or _current_version == _previous_auto_version
+        or str(_current_version or "").endswith(("/performance", "/devops"))
+    )
+):
+    st.session_state["specsheet_version"] = _default_specsheet_version
+st.session_state["specsheet_version_auto_source"] = _specsheet_defaults_source
+st.session_state["specsheet_version_auto_value"] = _default_specsheet_version
+
+_previous_auto_topic = st.session_state.get("specsheet_topic_auto_value")
+_current_topic = st.session_state.get("specsheet_topic_name")
+if (
+    st.session_state.get("specsheet_topic_auto_source") != _specsheet_defaults_source
+    and (
+        "specsheet_topic_name" not in st.session_state
+        or _current_topic == _previous_auto_topic
+    )
+):
+    st.session_state["specsheet_topic_name"] = _default_specsheet_topic
+st.session_state["specsheet_topic_auto_source"] = _specsheet_defaults_source
+st.session_state["specsheet_topic_auto_value"] = _default_specsheet_topic
+
+specsheet_cfg_col1, specsheet_cfg_col2, specsheet_cfg_col3 = st.columns([1.4, 1.2, 1.4])
+with specsheet_cfg_col1:
+    specsheet_project_id = st.text_input(
+        "Project ID",
+        value=_default_specsheet_project_id,
+        key="specsheet_project_id",
+    ).strip()
+with specsheet_cfg_col2:
+    specsheet_version = st.text_input(
+        "Version",
+        value=_default_specsheet_version,
+        key="specsheet_version",
+    ).strip()
+with specsheet_cfg_col3:
+    specsheet_topic_name = st.text_input(
+        "Topic name",
+        value=_default_specsheet_topic,
+        key="specsheet_topic_name",
+    ).strip()
+
+_detected_specsheet_labels = []
+for run_path in selected_specsheet_run_paths:
+    _detected_specsheet_labels.extend(collect_candidate_specsheet_labels(run_path))
+specsheet_labels = list(dict.fromkeys(_detected_specsheet_labels or _default_specsheet_labels))
+if specsheet_labels:
+    st.caption(f"Labels: all detected labels ({len(specsheet_labels)})")
+if not selected_specsheet_run_paths:
+    st.info("Pick at least one run to build the release spec-sheet.")
+
+if _selected_trend_metadata_text and "specsheet_include_trend" not in st.session_state:
+    st.session_state["specsheet_include_trend"] = True
+
+_release_trend_status_text = ""
+if selected_specsheet_release_contexts:
+    for release_context in selected_specsheet_release_contexts[:1]:
+        release_dir = release_context.get("release_dir")
+        roles = release_context.get("roles", {})
+        role_status = []
+        if isinstance(roles, dict):
+            for role_name in ("performance", "devops"):
+                role_info = roles.get(role_name)
+                if not isinstance(role_info, dict):
+                    continue
+                bits = []
+                bits.append("summary.json" if role_info.get("has_summary") else "no summary.json")
+                bits.append("metadata.yaml" if role_info.get("has_metadata") else "no metadata.yaml")
+                role_status.append(f"{role_name}: {', '.join(bits)}")
+        release_text = f"Release folder: `{path_display(release_dir)}`." if isinstance(release_dir, Path) else "Release folder detected."
+        if role_status:
+            release_text += " " + "; ".join(role_status) + "."
+        _release_trend_status_text = release_text
+
+trend_toggle_col, trend_status_col = st.columns([1.1, 2.9])
+with trend_toggle_col:
+    specsheet_trend_enabled = st.toggle(
+        "Include trend data",
+        value=bool(st.session_state.get("specsheet_include_trend", bool(_selected_trend_metadata_text))),
+        key="specsheet_include_trend",
+        help="Save release metadata and include available trend history.",
+    )
+with trend_status_col:
+    if specsheet_trend_enabled and _selected_trend_metadata_path is not None and _selected_trend_metadata_text:
+        st.caption(f"Using saved metadata: `{path_display(_selected_trend_metadata_path)}`")
+    elif specsheet_trend_enabled:
+        st.caption("No saved metadata found. Fill in release metadata below.")
+    if specsheet_trend_enabled and _release_trend_status_text:
+        st.caption(_release_trend_status_text)
+
+trend_metadata_payload = None
+trend_metadata_changed = False
+trend_metadata_change_confirmed = False
+if specsheet_trend_enabled:
+    _trend_metadata_source_key = str(_selected_trend_metadata_path) if _selected_trend_metadata_path is not None else "__default__"
+    if (
+        st.session_state.get("specsheet_trend_metadata_source") != _trend_metadata_source_key
+        or "specsheet_trend_metadata_text" not in st.session_state
+    ):
+        st.session_state["specsheet_trend_metadata_text"] = _selected_trend_metadata_text or DEFAULT_TREND_METADATA_TEXT
+        st.session_state["specsheet_trend_metadata_source"] = _trend_metadata_source_key
+        st.session_state["specsheet_confirm_metadata_changes"] = False
+    trend_metadata_text = st.text_area(
+        "Trend metadata YAML",
+        key="specsheet_trend_metadata_text",
+        height=180,
+        help="Required keys: tags, pilot_auto_version, data_count, description, date.",
+    )
+    trend_metadata_changed = bool(_selected_trend_metadata_text) and (
+        trend_metadata_text.strip() != _selected_trend_metadata_text.strip()
+    )
+    if trend_metadata_changed:
+        st.warning("Saved metadata was edited. Confirm before generating.")
+        trend_metadata_change_confirmed = st.checkbox(
+            "Confirm saved metadata changes",
+            key="specsheet_confirm_metadata_changes",
+        )
+    trend_metadata_status = st.empty()
+    try:
+        trend_metadata_payload = parse_trend_metadata_text(trend_metadata_text)
+        trend_metadata_status.success("Trend metadata looks valid.")
+    except Exception as trend_exc:
+        trend_metadata_status.error(f"Trend metadata error: {trend_exc}")
+
+_specsheet_key = {
+    "run_paths": [str(path) for path in selected_specsheet_run_paths],
+    "project_id": specsheet_project_id,
+    "version": specsheet_version,
+    "topic_name": specsheet_topic_name,
+    "labels": list(specsheet_labels),
+    "include_trend": specsheet_trend_enabled,
+    "trend_metadata": trend_metadata_payload if specsheet_trend_enabled else None,
+    "artifact_kind": "zip" if len(selected_specsheet_run_paths) > 1 else "pdf",
+}
+_specsheet_ready = (
+    st.session_state.get("specsheet_pdf_report_bytes") is not None
+    and st.session_state.get("specsheet_pdf_report_key") == _specsheet_key
+)
+
+def _release_specsheet_pdf_path(release_context: dict, topic_name: str) -> Path | None:
+    release_dir = release_context.get("release_dir")
+    if not isinstance(release_dir, Path):
+        return None
+    specsheet_root = release_dir / "specsheet"
+    topic = str(topic_name or "").strip()
+    candidates = []
+    if topic:
+        candidates.append(specsheet_root / topic / "specsheet.pdf")
+    candidates.append(specsheet_root / "specsheet.pdf")
+    candidates.extend(sorted(specsheet_root.glob("*/*.pdf")))
+    for candidate in candidates:
+        if candidate.exists() and not candidate.is_dir():
+            return candidate
+    return None
+
+_release_specsheet_paths = [
+    pdf_path
+    for pdf_path in (
+        _release_specsheet_pdf_path(release_context, specsheet_topic_name)
+        for release_context in selected_specsheet_release_contexts
+    )
+    if pdf_path is not None
+]
+_generated_specsheet_paths = [
+    path_info["specsheet_pdf"]
+    for path_info in _active_specsheet_paths
+    if path_info["specsheet_pdf"].exists() and is_specsheet_pdf_fresh(path_info["run_dir"])
+]
+_existing_specsheet_paths = _release_specsheet_paths or _generated_specsheet_paths
+_all_selected_specsheet_pdfs_ready = (
+    len(selected_specsheet_run_paths) > 0
+    and len(_existing_specsheet_paths) == len(selected_specsheet_run_paths)
+)
+_specsheet_has_existing_pdf = _specsheet_ready or _all_selected_specsheet_pdfs_ready
+_specsheet_action_label = (
+    "Regenerate Release Spec-sheet PDF"
+    if _specsheet_has_existing_pdf
+    else "Generate Release Spec-sheet PDF"
+)
+
+specsheet_action_col1, specsheet_action_col2 = st.columns([1.2, 2.8])
+with specsheet_action_col1:
+    if st.button(
+        _specsheet_action_label,
+        type="secondary" if _specsheet_has_existing_pdf else "primary",
+        use_container_width=True,
+    ):
+        _specsheet_status = st.empty()
+        _specsheet_progress = st.progress(0.0)
+        try:
+            if not specsheet_project_id:
+                raise ValueError("Project ID is required.")
+            if not specsheet_version:
+                raise ValueError("Version is required.")
+            if not specsheet_topic_name:
+                raise ValueError("Topic name is required.")
+            if not selected_specsheet_run_paths:
+                raise ValueError("At least one run must be selected.")
+            if specsheet_trend_enabled and len(selected_specsheet_run_paths) != 1:
+                raise ValueError("Trend-enabled release spec-sheet generation currently supports exactly one run.")
+            if specsheet_trend_enabled and trend_metadata_payload is None:
+                raise ValueError("Valid trend metadata is required when trend mode is enabled.")
+            if specsheet_trend_enabled and trend_metadata_changed and not trend_metadata_change_confirmed:
+                raise ValueError("Confirm the metadata.yaml changes before generating.")
+
+            stage_progress = {
+                "Using existing up-to-date spec-sheet PDF": 1.0,
+                "Loading CSV files": 0.15,
+                "Building abstract and detail sections": 0.2,
+                "Validating full trend summary": 0.9,
+                "Saving trend metadata": 0.9,
+                "Collecting trend history": 0.92,
+                "Rendering trend plots": 0.94,
+                "Rendering PDF": 0.95,
+                "Spec-sheet PDF is ready": 1.0,
+            }
+
+            def _update_specsheet_status(message: str) -> None:
+                fraction = None
+                label_fraction = progress_fraction_from_message(message)
+                if "[Full] Generating blocks for labels" in message and label_fraction is not None:
+                    fraction = 0.2 + (0.7 - 0.2) * label_fraction
+                elif (
+                    "[Full] Generating annotation count blocks for labels" in message
+                    and label_fraction is not None
+                ):
+                    fraction = 0.7 + (0.9 - 0.7) * label_fraction
+                elif label_fraction is not None and "Processing pkl files" in message:
+                    fraction = 0.02 + (0.12 - 0.02) * label_fraction
+                else:
+                    fraction = stage_progress.get(message, 0.05)
+                _specsheet_progress.progress(fraction)
+                _specsheet_status.info(f"Generating release spec-sheet: {message}")
+
+            generated_pdfs: list[tuple[Path, bool]] = []
+            for idx, run_path in enumerate(selected_specsheet_run_paths, start=1):
+                _update_specsheet_status(f"Run {idx}/{len(selected_specsheet_run_paths)}: {get_run_display_name(run_path)}")
+                if specsheet_trend_enabled and trend_metadata_payload is not None:
+                    if _selected_trend_metadata_path is not None and trend_metadata_changed:
+                        _selected_trend_metadata_path.write_text(
+                            yaml.safe_dump(trend_metadata_payload, allow_unicode=True, sort_keys=False),
+                            encoding="utf-8",
+                        )
+                    if len(selected_specsheet_release_contexts) == 1:
+                        roles = selected_specsheet_release_contexts[0].get("roles", {})
+                        if isinstance(roles, dict):
+                            for role_info in roles.values():
+                                if not isinstance(role_info, dict) or not role_info.get("has_summary"):
+                                    continue
+                                role_run_dir = role_info.get("run_dir")
+                                if isinstance(role_run_dir, Path):
+                                    write_trend_metadata(role_run_dir, trend_metadata_payload)
+                pdf_path, generated = generate_specsheet_pdf(
+                    run_path,
+                    project_id=specsheet_project_id,
+                    version=specsheet_version,
+                    labels=specsheet_labels,
+                    topic_name=specsheet_topic_name,
+                    include_trend=specsheet_trend_enabled,
+                    trend_metadata=trend_metadata_payload,
+                    force=True,
+                    progress_callback=_update_specsheet_status,
+                )
+                generated_pdfs.append((pdf_path, generated))
+
+            if len(generated_pdfs) == 1:
+                download_name = generated_pdfs[0][0].name
+                download_bytes = generated_pdfs[0][0].read_bytes()
+                download_mime = "application/pdf"
+            else:
+                zip_buffer = io.BytesIO()
+                with zipfile.ZipFile(zip_buffer, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+                    for pdf_path, _ in generated_pdfs:
+                        zf.write(pdf_path, arcname=f"{pdf_path.parent.parent.name}/{pdf_path.name}")
+                download_name = "specsheet_reports.zip"
+                download_bytes = zip_buffer.getvalue()
+                download_mime = "application/zip"
+
+            st.session_state["specsheet_pdf_report_bytes"] = download_bytes
+            st.session_state["specsheet_pdf_report_key"] = _specsheet_key
+            st.session_state["specsheet_pdf_report_name"] = download_name
+            st.session_state["specsheet_pdf_report_mime"] = download_mime
+            _specsheet_ready = True
+            _specsheet_progress.progress(1.0)
+            if any(generated for _, generated in generated_pdfs):
+                if len(generated_pdfs) == 1:
+                    _specsheet_status.success("Release spec-sheet PDF is ready.")
+                else:
+                    _specsheet_status.success("Release spec-sheet files are ready.")
+            else:
+                if len(generated_pdfs) == 1:
+                    _specsheet_status.success("Using the existing up-to-date release spec-sheet PDF.")
+                else:
+                    _specsheet_status.success("Using the existing up-to-date release spec-sheet files.")
+        except Exception as e:
+            st.session_state.pop("specsheet_pdf_report_bytes", None)
+            st.session_state.pop("specsheet_pdf_report_key", None)
+            st.session_state.pop("specsheet_pdf_report_name", None)
+            st.session_state.pop("specsheet_pdf_report_mime", None)
+            _specsheet_status.error(f"Spec-sheet generation failed: {e}")
+with specsheet_action_col2:
+    if _specsheet_ready:
+        st.success("Release spec-sheet is ready.")
+        st.download_button(
+            "Download Release Spec-sheet",
+            data=st.session_state["specsheet_pdf_report_bytes"],
+            file_name=st.session_state.get("specsheet_pdf_report_name", "specsheet.pdf"),
+            mime=st.session_state.get("specsheet_pdf_report_mime", "application/pdf"),
+            use_container_width=True,
+        )
+    elif _all_selected_specsheet_pdfs_ready:
+        st.success("Existing release spec-sheet is ready.")
+        if len(_existing_specsheet_paths) == 1:
+            _disk_pdf_path = _existing_specsheet_paths[0]
+            st.download_button(
+                "Download Release Spec-sheet",
+                data=_disk_pdf_path.read_bytes(),
+                file_name=_disk_pdf_path.name,
+                mime="application/pdf",
+                use_container_width=True,
+            )
+        else:
+            _zip_buffer = io.BytesIO()
+            with zipfile.ZipFile(_zip_buffer, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+                for pdf_path in _existing_specsheet_paths:
+                    zf.write(pdf_path, arcname=f"{pdf_path.parent.parent.name}/{pdf_path.name}")
+            st.download_button(
+                "Download Release Spec-sheets",
+                data=_zip_buffer.getvalue(),
+                file_name="specsheet_reports.zip",
+                mime="application/zip",
+                use_container_width=True,
+            )
+    else:
+        if len(selected_specsheet_run_paths) == 1:
+            _single_paths = _active_specsheet_paths[0]
diff --git a/evaluation_dashboard_app/Readme.en.md b/evaluation_dashboard_app/Readme.en.md
new file mode 100755
index 0000000..5d94cf1
--- /dev/null
+++ b/evaluation_dashboard_app/Readme.en.md
@@ -0,0 +1,372 @@
+# Evaluation Dashboard
+
+## Required Installation
+
+This dashboard and evaluation tool require the following prerequisites and Python packages.
+
+### Python packages (local development / full functionality)
+The easiest way is to install from the single `requirements.txt` at the repository root, including private dependencies.
+
+```sh
+cd evaluation_dashboard_app
+pip install -r requirements.txt
+```
+
+Example if you want to install packages manually in separate steps:
+
+```sh
+# Basic
+pip install \
+  streamlit pandas plotly duckdb numpy \
+  requests pyyaml matplotlib shapely
+
+# Download / Scenario API authentication
+pip install git+ssh://git@github.com/tier4/webauto-auth-py.git
+
+# Production task queue (when USE_TASK_QUEUE=true)
+pip install rq psycopg2-binary
+```
+
+In the **Docker image**, public dependencies are installed from [`requirements-docker.txt`](requirements-docker.txt), and private packages such as `webauto-auth` and the evaluation dependencies are added during build time using SSH secrets (see [`Dockerfile`](Dockerfile)).
+
+PDF export uses Plotly/Kaleido static image rendering, so **Chrome is also installed in the Docker image**. If you see `Kaleido requires Google Chrome to be installed` in the deployment environment, **rebuild and redeploy** with the latest image.
+
+```sh
+# Install CLI tool (if you use it for generating evaluation command lines)
+pipx install git+ssh://git@github.com/tier4/v_and_v_util.git
+```
+
+### pilot-auto / perception_eval (only needed when generating Summary / Score)
+- A pilot-auto environment with `perception_eval` available is required. See "Usage" below.
+- If importing `perception_eval` fails, generation of `Summary.csv` / `Score.csv` stops.
+
+### Configuration file
+- Input values are saved in `configs/autoware_evaluator_dl_config.json` (created / updated automatically).
+
+## Overview
+This is an evaluation dashboard built with Streamlit. It reads evaluation results under `data/` (`Summary.csv`, `Score.csv`, `.parquet`) and visualizes them across multiple pages. In addition, `pages/6_Download.py` supports bulk collection of evaluation results such as `result.txt`, automatic generation of `Summary.csv` / `Score.csv`, and searching / downloading result directories. The **TLR (Traffic Light Recognition) Analysis** page can visualize criteria matrices, vehicle state vs. signal type, important zones, and more for traffic-light recognition evaluation. To use it, you must first download scenario data from **tab 2 "Download Scenarios"** on the Download page.
+
+## Usage
+
+1. To generate summary or score files from `pages/6_Download.py` ("Generate Summary.csv / Score.csv"), you must **activate the pilot-auto (ROS 2) environment in advance** with the following command:
+   ```
+   source path_to_pilot/install/setup.sh
+   ```
+   This step is required for "Summary / Score CSV generation" in `pages/6_Download.py`.
+
+2. Start Streamlit from `evaluation_dashboard_app/`.
+   ```
+   streamlit run Overview.py
+   ```
+
+3. Choose pages and filters from the sidebar to explore the data.
+
+### Visualization quick start (recommended workflow)
+
+The recommended flow from downloading logs for a test to generating summaries and then reviewing the details in Overview is the following three-step process:
+
+1. **Download the target test logs from the Download page**
+2. **Generate summary / score files from "Eval Results" on the Download page**
+3. **Select that log (Run) on the Overview page and inspect the details**
+
+Below is a summary of what to do and what to watch out for in each step.
+
+#### Step 1: Download logs from the Download page
+
+- **Page**: Open **Download** (`6_Download.py`) from the sidebar.
+- **Tab**: Select **"Download Results"**.
+- **Inputs**:
+  - Enter **Project ID** and **Job ID**. Optionally specify a Suite ID if needed.
+  - For **Output Path**, specify **a folder dedicated to this test**.
+    To make it show up as a selectable "Run" in Overview, it is recommended to place one folder per test directly under `data/`.
+    Example: `./data/my_test_20250203`
+- **Download Type**:
+  - **Archives (ZIP)**: Downloads ZIP archives, extracts them, and takes data for the selected phase. Suitable for full local analysis.
+  - **Result JSON only**: Downloads only the result JSON. Lightweight and useful when you only want summary / score generation.
+- **Run**: Click "Download Results" and wait for completion.
+- **Result**: Under the specified Output Path, logs and, when needed, source files such as `result.txt` and `score.json` are stored in a directory structure based on the job / suite.
+
+![Download page settings (Download Results tab)](docs/images/download_config.png)
+
+![After download finishes](docs/images/download_result.png)
+
+#### Step 2: Generate summary analysis results in Eval Results
+
+- **Page**: Stay on the same **Download** page.
+- **Tab**: Switch to **"Eval Results (per directory)"** or **"Eval Results"**.
+- **Root directory to evaluate**:
+  - Specify **the same path used as Output Path in Step 1**.
+    Example: `./data/my_test_20250203`
+- **Options**:
+  - **Search subdirectories**: Searches subdirectories for `result.txt` / `score.json`. Usually this should be enabled.
+  - **Only generate Summary.csv and Score.csv**:
+    If each directory already contains `result.txt` or `score.json`, enabling this skips re-running `perception_eval` and generates **only `Summary.csv` and `Score.csv`** from the existing results.
+    On the first run, if `result.txt` and related outputs do not exist yet, leave this unchecked and run the full evaluation with "Run eval_result for all directories".
+- **Run**:
+  - Click either "Run eval_result for all directories" or "Generate Summary and Score CSV only".
+- **Result**: **`Summary.csv` and `Score.csv`** are generated directly under the specified root directory.
+  These files are the "summary analysis results" used by Overview and pages such as TP Summary and Criteria Based Score.
+
+![Eval Results tab (summary / score generation)](docs/images/eval_result.png)
+
+If `perception_eval` is used during Summary / Score generation, you must run `source path_to_pilot/install/setup.sh` in advance as described in "Usage".
+
+#### Step 3: Select the log in Overview and inspect the details
+
+- **Page**: Open **Overview** (`Overview.py`) from the sidebar.
+- **Selecting a Run**:
+  - Overview treats **each direct subdirectory under `data/`** as one "Run".
+  - If the Output Path in Step 1 was `./data/<test_name>`, that `<test_name>` appears in the sidebar dropdown for **"Baseline (A)"**.
+  - Choose the log (Run) you want to inspect in **Baseline (A)**.
+    If you want to compare runs, switch to **Compare Mode** and choose another Run in **Candidate (B)**.
+- **Displayed contents**:
+  - Overall metrics based on the selected Run's **Summary.csv** are shown, such as TP mean and XRMS / YRMS / XSTD / YSTD.
+  - By filtering with Perception Label / Product Label, you can inspect label-specific TP and metric breakdowns.
+  - Other pages such as TP Summary, Criteria Based Score, Detection Stats, and Bounding Box Viewer share the Run selected in Overview through `st.session_state`, so it is best to **select the Run in Overview first** and then move to the detailed pages.
+
+![Overview page (Run selection and metrics display)](docs/images/overview.png)
+
+**Key point**:
+- Whenever you add a new test, use `./data/<new_test_name>` as the Output Path in Download, then use that same path in Eval Results to generate Summary / Score. The new test will appear in the Overview Run list, and you can inspect it immediately.
+
+## Main Features
+- Select a Run on the Overview page, switch between single-run and compare mode, and display overall metrics
+- When the production task queue is enabled, track heavy jobs from the UI such as "Recent tasks"
+- TP / position / velocity statistical viewers (scatter plots and distributions)
+- Criteria-based evaluation viewer (metric distributions, averages, and box plots)
+- Detection statistics comparison viewer (for example TP / FP distance-bin comparison)
+- BEV bounding-box visualization
+- TLR (Traffic Light Recognition) evaluation analysis: criteria matrices, vehicle state vs. signal type, important zones. Requires scenario data downloaded from tab 2 of the Download page.
+- Evaluation command generation tool
+- **Docker production**: Navigate from Overview to **Deployment debug** (Postgres / Redis / RQ and optional Docker operations)
+
+## Directory Structure
+```text
+evaluation_dashboard_app/
+  Overview.py
+  pages/
+    1_TP_Summary.py … 10_Help.py, 99_Deployment_Debug.py (sidebar order follows the page numbers)
+  lib/
+  worker/            # Production: RQ tasks and worker entrypoint
+  configs/
+    autoware_evaluator_dl_config.json
+  deploy/            # Production: compose, nginx, numbered shell steps
+    docker-compose.yml
+    .env.example
+    01_SETUP_ENV.sh ... 09_RESTART_WORKER.sh
+    configs/
+      autoware_evaluator_dl_config.json   # Mounted inside the container at /app/docker_config during compose runs
+    nginx/
+  data/
+    <run_id>/
+      Summary.csv
+      Score.csv
+    *.parquet
+```
+
+## Page Guide
+
+The sidebar order follows the numbering of **`number_name.py` files directly under `pages/`**. **Deployment debug** (`99_Deployment_Debug.py`) must stay directly under `pages/` because it is registered through `st.page_link`. Outside Docker, `inject_app_page_styles` hides that sidebar item with CSS. Inside Docker, there is an explicit link from **Overview**.
+
+Many visualization pages rely on `st.session_state`, so it is best to **select the mode (single / compare) and Run in Overview first**. In compare mode, Baseline (A) and Candidate (B...) are shared across pages.
+
+### `Overview.py` (entry point)
+- Starting point for **shared filters** such as single / compare mode, Run selection, and Perception / Product labels.
+- **Shareable URL**: The same view can be reproduced using query parameters like `mode`, `run_a`, `run_b`, and so on. Some other pages follow the same pattern.
+- When running in Docker, the sidebar shows a link to **Deployment debug** (`pages/99_Deployment_Debug.py`).
+
+### `pages/1_TP_Summary.py`
+- **Prerequisite**: Data must already be loaded in Overview. **`Summary.csv` is required**. If a Run does not have it, TP Summary is unavailable, while Detection Stats / BB Viewer can still work with only parquet files and show guidance accordingly.
+- In compare mode, **deltas between runs** can be reflected in plots.
+- `TP` range, velocity outlier clipping, scatter plots (`xrms`-`yrms`, `vx`-`vy`), and distribution histograms.
+
+### `pages/2_Criteria_Based_Score.py`
+- A criteria evaluation viewer based on **`Score.csv`**. Follows the mode selected in Overview.
+- Criteria block switching, metric distributions, group averages, box plots, and scenario-level comparisons.
+- Includes UI for **Absolute gates** (sign-off by threshold pass / fail) and gate comparison across multiple Runs.
+
+### `pages/3_Detection_Stats.py`
+- Aggregates detection evaluation data using **`.parquet` + DuckDB**. Supports filters, hierarchical views, scenario breakdown, and **comparison across multiple Runs** when Overview is in compare mode.
+- Distance-bin comparison by status such as TP / FP and color schemes for perception diffs (improved / worsened).
+
+### `pages/4_Bounding_Box_Viewer.py`
+- **Prerequisite**: A Run must already be selected in Overview.
+- Displays bounding boxes on a **BEV** from `.parquet`. Supports filtering by t4dataset, topic, label, visibility, and more. In compare mode, it can handle multiple Runs.
+
+### `pages/5_Tools.py`
+- Evaluation command generation tool
+- Extract Job ID / Suite ID from Report / Suite URLs
+
+### `pages/6_Download.py`
+- Main integration point with the evaluator. The **tabs** are organized as follows:
+
+  | Tab | Contents |
+  |------|------|
+  | **Download Results** | Retrieve job results such as archive ZIPs or Result JSON. Output Path is restricted under the data root. |
+  | **Download Scenarios** | Download scenario data. Required by **TLR Analysis**. |
+  | **View Downloads** | Review downloaded jobs and scenarios. |
+  | **Eval Results** | Run evaluation or generate **Summary.csv / Score.csv** from `result.txt` / `score.json` under a root directory. |
+
+- When **`USE_TASK_QUEUE=true`** (Redis + Worker + Postgres), heavy work is queued to workers, and you can track status from the UI through **Recent tasks** and related sections.
+
+### `pages/7_Data_Management.py`
+- Displays the list of Runs under the data root, including size, update time, and whether Summary / Score / Parquet files exist.
+- Download outputs as a **ZIP**, copy **share links** for Overview, and **delete** Runs to manage storage in a multi-user server environment.
+
+### `pages/8_Parquet_Debug.py`
+- For development and troubleshooting. Reads **`.parquet` / `.pkl` / `result.json`** from file paths and shows schemas, keys, criteria state, and optional quick plots.
+- Useful for debugging pipeline outputs inside the dashboard.
+
+### `pages/9_TLR_Analysis.py`
+- **TLR (Traffic Light Recognition)** evaluation: criteria matrices, vehicle state vs. signal type, important zones, and more. Supports single / compare mode and **shareable URLs** such as `mode`, `path_a`, `path_b`.
+- **Prerequisite**: Download scenario data from **Download Scenarios** on the **Download** page and select the TLR result directory as a Run.
+
+### `pages/10_Help.py`
+- Displays the repository **README inside the app** so setup instructions, workflows, and documentation can be read directly in the browser.
+- Since **Mermaid diagrams** in Markdown are not rendered by default in Streamlit, this page renders them with JavaScript (Mermaid.js).
+
+### `pages/99_Deployment_Debug.py` (Docker only)
+- Available only when Streamlit is running **inside a container**. With local `streamlit run`, it stops at a guidance message.
+- Because it must be registered as **`pages/*.py` directly under the folder** for `st.page_link`, the corresponding auto-navigation item is **hidden with CSS outside Docker**. In Docker, you can also open it from the **Overview** sidebar via "Deployment debug".
+- Lets you inspect the state of Postgres / Redis / RQ, task counts, and, depending on configuration, the host Docker container list, recent logs, and restricted `docker exec`.
+- In production, mounting the **Docker socket grants strong privileges**, so check the authentication, VPN, and `EVAL_DEPLOYMENT_DEBUG_*` settings in [docs/PRODUCTION_DEPLOYMENT.md](docs/PRODUCTION_DEPLOYMENT.md).
+
+## Data Formats (high level)
+- `Summary.csv`: `id`, `TP`, `xstd`, `xrms`, `ystd`, `yrms`, `vx`, `vy`, `perception_label`, `product_label`
+- `Score.csv`: Criteria evaluation metric blocks (`Scenario`, `Option`, `GT_OBJ`, then `criteria0..n`)
+- `.parquet`: Fields used for detection statistics / bounding-box viewing, such as `x`, `y`, `length`, `width`, `yaw`, `label`, `source`, `status`
+
+# Docker Usage Guide
+
+The image is **ROS-based**, so the container environment matches the host ROS environment.
+
+### Build Steps
+
+Because private repositories (`tier4/webauto-auth-py`, `tier4/v_and_v_util`) are used, you must provide a **GitHub SSH key** during build time.
+Use `~/.ssh/id_rsa` directly. No ssh-agent is required.
+
+```sh
+cd evaluation_dashboard_app
+
+# Recommended: add --no-cache if you want to rebuild with the latest dependencies every time.
+# If ROS is Humble (can be omitted)
+docker build --no-cache --secret id=ssh,src=$HOME/.ssh/id_rsa -t evaluation-dashboard .
+
+# If you want to switch ROS_DISTRO to Iron / Jazzy etc.
+docker build --build-arg ROS_DISTRO=iron --secret id=ssh,src=$HOME/.ssh/id_rsa -t evaluation-dashboard .
+```
+
+### Production deployment
+
+For multi-user / production use, the recommended setup is **Nginx -> Streamlit -> Redis (task queue) -> Worker -> Postgres**. Heavy jobs such as downloads, evaluation, Summary / Score CSV generation, and parquet generation are executed by workers instead of the UI process, and task state is stored in Postgres.
+
+**Target Architecture:**
+
+```mermaid
+flowchart LR
+  subgraph clients [Clients]
+    Browser[Browser]
+  end
+  subgraph edge [Edge]
+    Nginx[Nginx]
+  end
+  subgraph app [App Tier]
+    S1[Streamlit 1]
+    S2[Streamlit 2]
+  end
+  subgraph infra [Infrastructure]
+    Redis[Redis]
+    Postgres[Postgres]
+  end
+  subgraph workers [Workers]
+    W1[Worker 1]
+    W2[Worker N]
+  end
+  Browser --> Nginx
+  Nginx --> S1
+  Nginx --> S2
+  S1 --> Redis
+  S2 --> Redis
+  S1 --> Postgres
+  S2 --> Postgres
+  Redis --> W1
+  Redis --> W2
+  W1 --> Postgres
+  W2 --> Postgres
+  W1 --> DataRoot[Data root]
+  W2 --> DataRoot
+```
+
+- **Build**: As described above in "Build Steps", run `docker build ... -t evaluation-dashboard .` in `evaluation_dashboard_app/`. The compose services `streamlit1` (default), optional `streamlit2` (`--profile ha`), and `worker` all use this image.
+- **Recommended flow (`deploy/` numbered scripts)**: Move into `deploy/` and run the scripts in order. All of them use `docker compose --env-file .env`.
+
+  | Script | Description |
+  |-----------|------|
+  | `01_SETUP_ENV.sh` | Create `.env` from `.env.example` if it does not exist. **You still edit it manually.** |
+  | `02_BUILD.sh` | Build the image. You can pass arguments such as `--no-cache`. |
+  | `03_INIT_DB.sh` | **First time only**: after Postgres starts, run `init_db` to create task tables. |
+  | `04_START.sh` | Start the stack. Default worker count comes from `.env` `EVAL_COMPOSE_SCALE_WORKER`; for example `./04_START.sh --scale worker=3` overrides it. |
+  | `05_STOP.sh` | Stop the stack. |
+  | `06_STATUS.sh` | Check service status. |
+  | `07_LOGS.sh` | Run `docker compose logs -f`. Without arguments it shows all services; for example `./07_LOGS.sh worker`. |
+  | `08_REBUILD_AND_START.sh` | Build and then start the stack, same startup behavior as `04_START.sh`. |
+  | `09_RESTART_WORKER.sh` | Restart workers so code changes are reflected on the worker side. |
+
+- **Manual setup is also possible**: `cd deploy && cp .env.example .env` -> edit `.env` -> `docker compose --env-file .env up -d`. For first-time setup only, run `docker compose --env-file .env run --rm init_db` (equivalent to `03_INIT_DB.sh`).
+- **Access**: In production compose, **Nginx listens on port 80**, and Streamlit is accessed through the proxy (see `docker-compose.yml` / `nginx/nginx.conf`). Since the source code and `lib/` are mounted, **Streamlit reloads easily when files change**, but **workers must be restarted after Python code changes**.
+- **If the UI keeps loading forever**: Streamlit communicates with the browser over **WebSocket**. Suggested checks: (1) do a **hard reload** including cache reset or reopen in another tab, (2) by default Nginx points only to **one Streamlit app** (`streamlit1`), and a second instance should be enabled only when needed with `docker compose --profile ha up -d` plus upstream changes in `nginx.conf`, (3) set **`STREAMLIT_SERVER_COOKIE_SECRET`** in `deploy/.env.example`, (4) use `.streamlit/config.toml` `enableWebsocketCompression = false` and Nginx `proxy_buffering off` plus suitable `proxy_*_timeout`, and (5) check logs with `docker compose logs streamlit1 nginx`.
+- **502 Bad Gateway**: This happens when Nginx **cannot reach Streamlit** because the process exited, was killed by OOM, or stayed blocked for too long. Check `docker compose logs streamlit1` and host **`dmesg`** for OOM messages. Heavy pages can consume significant memory, so the **default single-instance setup** and the single upstream in `deploy/nginx/nginx.conf` are recommended.
+- **Troubleshooting Detection Stats freezes / 502**: Set **`EVAL_DETECTION_STATS_DEBUG=1`** in `.env` so it is passed into the compose `streamlit1` service, then restart. The **Detection Stats debug** expander at the bottom of the page and the stderr of **`docker compose logs streamlit1`** will show section boundaries, `getrusage` memory values, and elapsed time before / after DuckDB calls.
+- **If a subpage says "load in Overview" even though Overview was already opened**: Session state is stored **in memory per replica**. Overview also syncs `mode` / `run_a` / `run_b`... into the URL, so when those query parameters remain in the address bar, subpages such as Detection Stats can **rebuild `run_a` into `runA`** via `lib/overview_url_hydrate.py`. Open **Overview once**, confirm the address bar contains `run_a=`, then move to the subpage, or reopen from the **Overview share link**.
+- **Avoid duplicate config management**: During compose runs, `deploy/configs/autoware_evaluator_dl_config.json` is mounted inside the container as `EVAL_DASHBOARD_CONFIG` (`/app/docker_config/...`). This is a separate file from the host `configs/` version, so edit the one under `deploy/configs/` for Docker-specific settings.
+- For detailed settings and environment variables, see [docs/PRODUCTION_DEPLOYMENT.md](docs/PRODUCTION_DEPLOYMENT.md).
+
+### Startup and data mount (single container)
+
+Always mount the `data/` directory so data is persisted and visible.
+
+```sh
+docker run -p 8501:8501 \
+  -v "$(pwd)/data:/app/data" \
+  -v ~/.webauto:/root/.webauto \
+  evaluation-dashboard
+```
+
+### Example: run in background (`-d`)
+
+If you want to start the container in detached mode, add `-d` and optionally set `--name`. If you want to synchronize the entire `/app` tree, including code and notebooks, with the host, use the following form.
+
+```sh
+docker run -d --name evaluation-dashboard \
+  -p 8501:8501 \
+  -v "$(pwd):/app" \
+  -v ~/.webauto:/root/.webauto \
+  evaluation-dashboard
+```
+
+### Multi-user deployment
+
+If multiple people access the same server for downloads, evaluation, result review, sharing, and data management, refer to the following points.
+
+- **Data root**: You can set the evaluation data root with environment variable `EVAL_DASHBOARD_DATA_ROOT` (default is `data`). Example: `-e EVAL_DASHBOARD_DATA_ROOT=/var/eval_dashboard/data`
+- **Path restriction**: The Download Output Path and Eval Root directory are restricted under this data root, and path traversal is rejected.
+- **Data Management page**: Lets you view the Run list, show sizes, delete Runs, and copy share links. You can remove unnecessary Runs to manage disk usage.
+- **Sharing results**: By adding `?mode=...&run_a=...&run_b=...` to the Overview URL, you can share the same Run view. Links can be copied from Data Management or "Share this view" in Overview.
+- See [docs/MULTI_USER_DEPLOYMENT.md](docs/MULTI_USER_DEPLOYMENT.md) for more details.
+
+### Debugging and shell access
+
+If you want shell access inside a running container, use one of the following methods.
+
+**1. Enter bash by container ID**
+```sh
+docker ps         # check the [CONTAINER ID]
+docker exec -it [CONTAINER ID] /bin/bash
+```
+
+**2. Start directly with bash as the entrypoint**
+```sh
+docker run -it --entrypoint bash \
+  -v "$(pwd)/data:/app/data" \
+  evaluation-dashboard
+```
diff --git a/evaluation_dashboard_app/Readme.md b/evaluation_dashboard_app/Readme.md
old mode 100644
new mode 100755
index add4149..02c2aa9
--- a/evaluation_dashboard_app/Readme.md
+++ b/evaluation_dashboard_app/Readme.md
@@ -29,6 +29,8 @@ pip install rq psycopg2-binary
 
 **Docker イメージ**では、公開依存は [`requirements-docker.txt`](requirements-docker.txt) で入り、ビルド時の SSH シークレットで webauto-auth・評価系のプライベートパッケージを追加インストールします（[`Dockerfile`](Dockerfile) 参照）。
 
+PDF エクスポートでは Plotly/Kaleido の静的画像化を使うため、**Docker イメージ内に Chrome もインストール**されます。デプロイ環境で `Kaleido requires Google Chrome to be installed` が出た場合は、最新のイメージへ **再 build / 再 deploy** してください。
+
 ```sh
 # Install CLI tool (評価実行コマンド生成で利用する場合)
 pipx install git+ssh://git@github.com/tier4/v_and_v_util.git
@@ -295,7 +297,7 @@ flowchart LR
   W2 --> DataRoot
 ```
 
-- **ビルド**: 上記「ビルド手順」のとおり `evaluation_dashboard_app/` で `docker build ... -t evaluation-dashboard .`（compose の `streamlit` / `worker` はこのイメージを参照します）。
+- **ビルド**: 上記「ビルド手順」のとおり `evaluation_dashboard_app/` で `docker build ... -t evaluation-dashboard .`（compose の `streamlit1`（既定）・任意の `streamlit2`（`--profile ha`）・`worker` はこのイメージを参照します）。
 - **推奨フロー（`deploy/` の番号付きスクリプト）**: `deploy/` に移動して順に実行します（すべて `docker compose --env-file .env` を使います）。
 
   | スクリプト | 内容 |
@@ -303,15 +305,19 @@ flowchart LR
   | `01_SETUP_ENV.sh` | `.env` が無ければ `.env.example` から作成（**編集は手動**） |
   | `02_BUILD.sh` | イメージビルド（引数で `--no-cache` など可） |
   | `03_INIT_DB.sh` | **初回のみ**: Postgres 起動後に `init_db` でタスク用テーブル作成 |
-  | `04_START.sh` | スタック起動（例: `./04_START.sh --scale worker=3`） |
+  | `04_START.sh` | スタック起動（デフォルト worker 数は `.env` の `EVAL_COMPOSE_SCALE_WORKER`、例: `./04_START.sh --scale worker=3` で上書き可） |
   | `05_STOP.sh` | 停止 |
   | `06_STATUS.sh` | 状態確認 |
   | `07_LOGS.sh` | `docker compose logs -f`（省略時は全サービス、例: `./07_LOGS.sh worker`） |
-  | `08_REBUILD_AND_START.sh` | ビルド後に `up -d` |
+  | `08_REBUILD_AND_START.sh` | ビルド後に `04_START.sh` と同じ起動（worker 既定本数あり） |
   | `09_RESTART_WORKER.sh` | ワーカー再起動（コード変更を worker に反映） |
 
 - **手動でも同じことは可能**: `cd deploy && cp .env.example .env` → `.env` を編集 → `docker compose --env-file .env up -d`。初回のみ `docker compose --env-file .env run --rm init_db`（`03_INIT_DB.sh` と同等）。
 - **アクセス**: 本番 compose では **Nginx がポート 80**、Streamlit はプロキシ経由（`docker-compose.yml` / `nginx/nginx.conf` 参照）。ソースや `lib/` はマウントされているため **Streamlit はファイル変更でリロード**しやすい一方、**ワーカーは Python 変更後に再起動**が必要です。
+- **UI がずっとロード中になるとき**: Streamlit はブラウザと **WebSocket** でつながります。対処の目安: (1) **ハードリロード**（キャッシュ削除込み）や別タブで開き直す。(2) **既定は Streamlit アプリ 1 台**（`streamlit1`）のみ Nginx が向き先にしています。2 台目が必要な場合のみ `docker compose --profile ha up -d` と `nginx.conf` の upstream 追記を参照。(3) compose で **`STREAMLIT_SERVER_COOKIE_SECRET`**（`deploy/.env.example`）。(4) **`.streamlit/config.toml`** の `enableWebsocketCompression = false` と Nginx の **`proxy_buffering off`** / `proxy_*_timeout`。(5) ログ: `docker compose logs streamlit1 nginx`。
+- **502 Bad Gateway**: Nginx が **Streamlit に繋がらない**ときに出ます（プロセス落ち・OOM・長時間ブロックで切断など）。`docker compose logs streamlit1` とホストの **`dmesg`（OOM）** を確認。重いページはメモリを食うため、**既定の 1 台構成**と `deploy/nginx/nginx.conf` の単一 upstream を推奨します。
+- **Detection Stats のフリーズ / 502 切り分け**: `.env` に **`EVAL_DETECTION_STATS_DEBUG=1`**（compose の `streamlit1` に渡る）を入れて再起動。ページ下部の **Detection Stats debug** 展開と **`docker compose logs streamlit1`** の stderr に、セクション境界・`getrusage` メモリ・DuckDB 前後の経過時間が出ます。
+- **サブページで「Overview で読み込み」と出るのに Overview は済んでいるとき**: セッション状態は **レプリカごとのメモリ**にあります。Overview は URL に `mode` / `run_a` / `run_b`…を同期するため、**同じ URL のクエリが付いたまま**ならサブページ（Detection Stats など）が **`run_a` から `runA` を再構築**します（`lib/overview_url_hydrate.py`）。一度 **Overview を開いて**アドレスバーに `run_a=` があることを確認してからサブページへ進むか、または **Overview の共有リンク**から開き直してください。
 - **設定の二重管理を避ける**: compose 実行時は `deploy/configs/autoware_evaluator_dl_config.json` がコンテナ内 `EVAL_DASHBOARD_CONFIG`（`/app/docker_config/...`）としてマウントされます。ホストの `configs/` とは別ファイルなので、Docker 用に変えたい値はこちらを編集します。
 - 詳細・環境変数一覧は [docs/PRODUCTION_DEPLOYMENT.md](docs/PRODUCTION_DEPLOYMENT.md) を参照してください。
 
diff --git a/evaluation_dashboard_app/catalogs.json b/evaluation_dashboard_app/catalogs.json
new file mode 100755
index 0000000..4270cc5
--- /dev/null
+++ b/evaluation_dashboard_app/catalogs.json
@@ -0,0 +1,38 @@
+[
+  {
+    "display_name": "Build Test Catalog",
+    "catalog_id": "bd0569ec-9826-44ac-8780-45b4cea624e6",
+    "description": "Try this catalog for testing build integration",
+    "integration_id": "900d2096-a112-48f0-a65e-27e122aad86a"
+  },
+  {
+    "display_name": "Performance Test",
+    "catalog_id": "e36d75b9-6c3a-4970-9b9b-5cd13f7a9da3",
+    "description": "To calculate perception metrics and create metrics report",
+    "integration_id": "96ad8fba-0228-4c2b-9166-07d4de1a0760"
+  },
+  { 
+    "display_name": "Old performance test",
+    "catalog_id": "e2efe01d-e0c6-4d49-8223-817ff5d73204",
+    "description": "Run perception metrics test we have done previously",
+    "integration_id": "6126e86f-615f-4b84-9643-91b88db606bd"
+  },
+  {
+    "display_name": "Devops Test",
+    "catalog_id": "ab0f8498-cc1b-4726-836f-e18e8bcb3200",
+    "description": "Edge case for devops integration",
+    "integration_id": "295cff78-9bc9-4d60-b7aa-f95be6ff96a4"
+  },
+  {
+    "display_name": "Usecase Performance Catalog",
+    "catalog_id": "09039022-ec91-41bf-9e93-fdefccdfc9bc",
+    "description": "[WIP] Run evaluation based on planning scene catalog.",
+    "integration_id": "51f89d37-5c65-4449-9add-8971d0a79a7a"
+  },
+  {
+    "display_name": "L4 regression test",
+    "catalog_id": "14b1d54b-5c9f-4cbf-a7e1-0eebceb1d30f",
+    "description": "[WARN] This is a regression test for L4, please do not use it for other purposes",
+    "integration_id": "c5f58b3c-8974-4f33-a8fa-e1f443320cfd"
+  }
+]
diff --git a/evaluation_dashboard_app/deploy/.env b/evaluation_dashboard_app/deploy/.env
index 1bfde14..75a1ec2 100644
--- a/evaluation_dashboard_app/deploy/.env
+++ b/evaluation_dashboard_app/deploy/.env
@@ -17,6 +17,10 @@ DATABASE_URL=postgresql://eval_user:eval_pass@postgres:5432/eval_dashboard
 REDIS_URL=redis://redis:6379/0
 RQ_QUEUE=default
 
+# T4 visualizer server base URL used by Bounding Box Viewer / T4 pages
+# For Docker-on-Linux, host.docker.internal is mapped via docker-compose extra_hosts
+T4_VISUALIZER_BASE_URL=http://10.0.6.148:8000
+
 # Optional: per-user task visibility (company auth / WebAutoAuth)
 # Header name set by auth proxy with current user id (e.g. X-Forwarded-User)
 # AUTH_USER_HEADER=X-Forwarded-User
@@ -26,4 +30,5 @@ RQ_QUEUE=default
 # NGINX_HTTPS=1
 
 EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT=deploy
-EVAL_DEPLOYMENT_DEBUG_EXEC=1
\ No newline at end of file
+EVAL_DEPLOYMENT_DEBUG_EXEC=1
+EVAL_COMPOSE_SCALE_WORKER=3
\ No newline at end of file
diff --git a/evaluation_dashboard_app/deploy/.env.example b/evaluation_dashboard_app/deploy/.env.example
index 1add32b..50d5a65 100644
--- a/evaluation_dashboard_app/deploy/.env.example
+++ b/evaluation_dashboard_app/deploy/.env.example
@@ -17,6 +17,19 @@ DATABASE_URL=postgresql://eval_user:eval_pass@postgres:5432/eval_dashboard
 REDIS_URL=redis://redis:6379/0
 RQ_QUEUE=default
 
+# T4 visualizer server base URL used by Bounding Box Viewer / T4 pages
+# In Docker, set to a host-reachable endpoint (compose maps host.docker.internal)
+T4_VISUALIZER_BASE_URL=http://host.docker.internal:8000
+
+# Docker Compose: default worker replica count (04_START.sh / 08_REBUILD_AND_START.sh). Streamlit defaults to streamlit1 only; optional second app server: compose --profile ha (see docker-compose.yml + nginx.conf).
+EVAL_COMPOSE_SCALE_WORKER=2
+
+# Same secret on both Streamlit containers (session cookies / multi-replica). Compose sets a dev default; override in production: openssl rand -hex 32
+# STREAMLIT_SERVER_COOKIE_SECRET=
+
+# Detection Stats page: stderr timing logs + debug expander (docker compose logs streamlit1)
+# EVAL_DETECTION_STATS_DEBUG=1
+
 # RQ: max job runtime before the worker kills the job (seconds). Default 7 days if unset.
 # RQ_JOB_TIMEOUT_SEC=604800
 # Optional: longer timeout for build_parquet only (defaults to RQ_JOB_TIMEOUT_SEC if unset)
diff --git a/evaluation_dashboard_app/deploy/.streamlit/config.toml b/evaluation_dashboard_app/deploy/.streamlit/config.toml
new file mode 100644
index 0000000..14d8726
--- /dev/null
+++ b/evaluation_dashboard_app/deploy/.streamlit/config.toml
@@ -0,0 +1,13 @@
+# Streamlit project config (used for local `streamlit run` and Docker WORKDIR=/app).
+# See https://docs.streamlit.io/develop/api-reference/configuration/config.toml
+
+[server]
+# Local default: open browser when running outside Docker
+headless = false
+
+# Behind nginx or other proxies, per-message WebSocket compression can break or stall
+# some setups (see Streamlit troubleshooting: "App is not loading when running remotely").
+enableWebsocketCompression = false
+
+# cookieSecret: MUST be identical on every Streamlit replica behind a load balancer.
+# Set via environment in Docker: STREAMLIT_SERVER_COOKIE_SECRET (see deploy/docker-compose.yml).
diff --git a/evaluation_dashboard_app/deploy/04_START.sh b/evaluation_dashboard_app/deploy/04_START.sh
index e087e35..451c209 100755
--- a/evaluation_dashboard_app/deploy/04_START.sh
+++ b/evaluation_dashboard_app/deploy/04_START.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-# 04 — Start the full stack, or if it is already running: up -d (apply compose/scale) then restart all services.
-# Extra args: e.g. ./04_START.sh --scale worker=3
+# 04 — Start or update the full stack with docker compose up -d.
+# Default: 2 worker replicas (EVAL_COMPOSE_SCALE_WORKER in .env). Override: ./04_START.sh --scale worker=1 (last --scale wins).
 set -euo pipefail
 DEPLOY_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$DEPLOY_DIR"
@@ -9,12 +9,16 @@ if [[ ! -f .env ]]; then
   exit 1
 fi
 
+set -a
+# shellcheck disable=SC1091
+source .env
+set +a
+WORKER_SCALE="${EVAL_COMPOSE_SCALE_WORKER:-2}"
+
 dc() { docker compose --env-file .env "$@"; }
 
-if [[ -n "$(dc ps -q --status running 2>/dev/null || true)" ]]; then
-  echo "Stack already running — updating with up -d, then restarting all services."
-  dc up -d "$@"
-  dc restart
-else
-  dc up -d "$@"
-fi
+dc up -d --scale "worker=${WORKER_SCALE}" "$@"
+
+# Nginx resolves Docker service names at startup. Recreate it after Streamlit is
+# up so it remounts the current nginx.conf and cannot keep a stale container IP.
+dc up -d --no-deps --force-recreate nginx
diff --git a/evaluation_dashboard_app/deploy/08_REBUILD_AND_START.sh b/evaluation_dashboard_app/deploy/08_REBUILD_AND_START.sh
index b216686..2763b55 100755
--- a/evaluation_dashboard_app/deploy/08_REBUILD_AND_START.sh
+++ b/evaluation_dashboard_app/deploy/08_REBUILD_AND_START.sh
@@ -8,4 +8,4 @@ if [[ ! -f .env ]]; then
   exit 1
 fi
 docker compose --env-file .env build "$@"
-docker compose --env-file .env up -d
+exec "$DEPLOY_DIR/04_START.sh"
diff --git a/evaluation_dashboard_app/deploy/docker-compose.yml b/evaluation_dashboard_app/deploy/docker-compose.yml
index 8fd4b87..54fee77 100644
--- a/evaluation_dashboard_app/deploy/docker-compose.yml
+++ b/evaluation_dashboard_app/deploy/docker-compose.yml
@@ -1,8 +1,10 @@
-# Production-style stack: Nginx -> Streamlit, Redis, Worker, Postgres.
+# Production-style stack: Nginx -> Streamlit (default: one app replica) + Workers (default scale via EVAL_COMPOSE_SCALE_WORKER in .env / 04_START.sh).
+# Second Streamlit: optional HA profile — `docker compose --profile ha up -d` and uncomment streamlit2 in deploy/nginx/nginx.conf upstream.
 # Helper scripts (run from deploy/): 01_SETUP_ENV.sh 02_BUILD.sh 03_INIT_DB.sh 04_START.sh 05_STOP.sh
 #   06_STATUS.sh 07_LOGS.sh 08_REBUILD_AND_START.sh 09_RESTART_WORKER.sh
 # Run from deploy/: docker compose --env-file .env up -d
-# Scale workers: docker-compose up -d --scale worker=3  (default 1 worker)
+# Plain `up -d` uses one worker unless you pass --scale worker=N; 04_START.sh defaults to EVAL_COMPOSE_SCALE_WORKER (2).
+# More Streamlit boxes: duplicate x-streamlit-app block as streamlit3, add server to nginx upstream.
 # Build image from repo root: docker build -t evaluation-dashboard . (see Readme)
 #
 # Data is bind-mounted to the host so you can access it directly:
@@ -11,7 +13,7 @@
 #   - ${HOME}/.webauto -> Download/Scenario API credentials (streamlit + worker)
 #
 # App source is mounted so you can edit Python code without rebuilding the image.
-# Streamlit will reload on file changes. Restart the worker to pick up changes: docker compose restart worker
+# Streamlit will reload on file changes. Restart workers: docker compose restart worker
 #
 # Deployment debug (pages/99_Deployment_Debug.py; nav hidden outside Docker via CSS; sidebar link on Overview in Docker): Streamlit mounts the host
 # Docker socket and sets EVAL_DEPLOYMENT_DEBUG_DOCKER=1. Anyone who can use the dashboard
@@ -19,6 +21,66 @@
 # networks. Set EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT in .env (see .env.example).
 # EVAL_DEPLOYMENT_DEBUG_EXEC=1 in .env enables one-shot shell (docker exec) from the UI.
 
+x-streamlit-app: &streamlit-app
+  build:
+    context: ..
+    dockerfile: Dockerfile
+    secrets:
+      - ssh
+  image: evaluation-dashboard
+  command: ["/app/docker-entrypoint.sh"]
+  environment:
+    - TZ=Asia/Tokyo
+    # Same value on streamlit1 + streamlit2 so session cookies validate behind nginx (see .streamlit/config.toml).
+    # Override in .env for production (e.g. openssl rand -hex 32).
+    - STREAMLIT_SERVER_COOKIE_SECRET=${STREAMLIT_SERVER_COOKIE_SECRET:-evaluationdashboard-streamlit-cookie-secret-change-in-production}
+    # Verbose stderr logs + timing expander on Detection Stats page (see lib/detection_stats_debug.py)
+    - EVAL_DETECTION_STATS_DEBUG=${EVAL_DETECTION_STATS_DEBUG:-0}
+    - EVAL_DASHBOARD_DATA_ROOT=${EVAL_DASHBOARD_DATA_ROOT:-/app/data}
+    - EVAL_DASHBOARD_CONFIG=/app/docker_config/autoware_evaluator_dl_config.json
+    - USE_TASK_QUEUE=${USE_TASK_QUEUE:-true}
+    - DATABASE_URL=${DATABASE_URL}
+    - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
+    - RQ_QUEUE=${RQ_QUEUE:-default}
+    - EVAL_DEPLOYMENT_DEBUG_DOCKER=1
+    - EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT=${EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT:-}
+    # One-shot shell in selected container (docker exec). Default off; set to 1 in .env only when needed.
+    - EVAL_DEPLOYMENT_DEBUG_EXEC=${EVAL_DEPLOYMENT_DEBUG_EXEC:-0}
+  volumes:
+    - ../data:/app/data
+    - ${HOME}/.webauto:/root/.webauto
+    # Docker-only config (separate from your local configs)
+    - ./configs:/app/docker_config
+    # Mount app source so code changes apply without rebuild (Streamlit auto-reloads)
+    - ../Overview.py:/app/Overview.py
+    - ../docker-entrypoint.sh:/app/docker-entrypoint.sh
+    - ../catalogs.json:/app/catalogs.json
+    - ../pages:/app/pages
+    - ../Readme.md:/app/Readme.md
+    - ../Readme.en.md:/app/Readme.en.md
+    - ../lib:/app/lib
+    - ../worker:/app/worker
+    - ../configs:/app/configs
+    - ../static:/app/static
+    - ../.streamlit:/app/.streamlit
+    - /var/run/docker.sock:/var/run/docker.sock
+  extra_hosts:
+    - "host.docker.internal:host-gateway"
+  env_file:
+    - .env
+  depends_on:
+    redis:
+      condition: service_started
+    postgres:
+      condition: service_healthy
+  healthcheck:
+    test: ["CMD-SHELL", "curl -fsS http://localhost:8501/_stcore/health >/dev/null || curl -fsS http://localhost:8501/healthz >/dev/null"]
+    interval: 5s
+    timeout: 3s
+    retries: 30
+    start_period: 20s
+  restart: unless-stopped
+
 services:
   nginx:
     image: nginx:alpine
@@ -27,55 +89,27 @@ services:
     volumes:
       - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
     depends_on:
-      - streamlit
-    restart: unless-stopped
-
-  streamlit:
-    build:
-      context: ..
-      dockerfile: Dockerfile
-      secrets:
-        - ssh
-    image: evaluation-dashboard
-    command: ["/app/docker-entrypoint.sh"]
-    environment:
-      - TZ=Asia/Tokyo
-      - EVAL_DASHBOARD_DATA_ROOT=${EVAL_DASHBOARD_DATA_ROOT:-/app/data}
-      - EVAL_DASHBOARD_CONFIG=/app/docker_config/autoware_evaluator_dl_config.json
-      - USE_TASK_QUEUE=${USE_TASK_QUEUE:-true}
-      - DATABASE_URL=${DATABASE_URL}
-      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
-      - RQ_QUEUE=${RQ_QUEUE:-default}
-      - EVAL_DEPLOYMENT_DEBUG_DOCKER=1
-      - EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT=${EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT:-}
-      # One-shot shell in selected container (docker exec). Default off; set to 1 in .env only when needed.
-      - EVAL_DEPLOYMENT_DEBUG_EXEC=${EVAL_DEPLOYMENT_DEBUG_EXEC:-0}
-    volumes:
-      - ../data:/app/data
-      - ${HOME}/.webauto:/root/.webauto
-      # Docker-only config (separate from your local configs)
-      - ./configs:/app/docker_config
-      # Mount app source so code changes apply without rebuild (Streamlit auto-reloads)
-      - ../Overview.py:/app/Overview.py
-      - ../pages:/app/pages
-      - ../Readme.md:/app/Readme.md
-      - ../lib:/app/lib
-      - ../worker:/app/worker
-      - ../configs:/app/configs
-      - /var/run/docker.sock:/var/run/docker.sock
-    env_file:
-      - .env
-    depends_on:
-      redis:
-        condition: service_started
-      postgres:
+      streamlit1:
         condition: service_healthy
     restart: unless-stopped
 
+  streamlit1:
+    <<: *streamlit-app
+
+  streamlit2:
+    <<: *streamlit-app
+    profiles:
+      - ha
+
   redis:
     image: redis:7-alpine
     restart: unless-stopped
-
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+      
   worker:
     build:
       context: ..
@@ -98,11 +132,15 @@ services:
       - ./configs:/app/docker_config
       # Mount app source so code changes apply without rebuild (restart worker to pick up: docker compose restart worker)
       - ../Overview.py:/app/Overview.py
+      - ../catalogs.json:/app/catalogs.json
       - ../pages:/app/pages
       - ../Readme.md:/app/Readme.md
+      - ../Readme.en.md:/app/Readme.en.md
       - ../lib:/app/lib
       - ../worker:/app/worker
       - ../configs:/app/configs
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
     env_file:
       - .env
     depends_on:
@@ -144,4 +182,4 @@ services:
 
 secrets:
   ssh:
-    file: ${HOME}/.ssh/id_rsa
\ No newline at end of file
+    file: ${HOME}/.ssh/id_rsa
diff --git a/evaluation_dashboard_app/deploy/nginx/nginx.conf b/evaluation_dashboard_app/deploy/nginx/nginx.conf
index a4766dd..de69355 100644
--- a/evaluation_dashboard_app/deploy/nginx/nginx.conf
+++ b/evaluation_dashboard_app/deploy/nginx/nginx.conf
@@ -1,32 +1,40 @@
 # Nginx: reverse proxy to Streamlit with WebSocket support.
-# For multiple Streamlit replicas, add more "server streamlit:8501" lines in upstream.
-
+#
+# Default upstream is streamlit1 only. A second replica (streamlit2) is optional in docker-compose
+# (profile "ha"); if you add it, duplicate the server line below and use ip_hash for sticky sessions.
+# Pointing nginx at a dead/crashed upstream yields 502 — single replica reduces RAM pressure and failure modes.
 events {
-    worker_connections 1024;
+    worker_connections 2048;
 }
 
 http {
-    upstream streamlit {
-        server streamlit:8501;
-        # Add more servers for load balancing:
-        # server streamlit2:8501;
-        # server streamlit3:8501;
-    }
+    # Docker's embedded DNS. Resolve Streamlit at request time so nginx does not keep
+    # a stale container IP after `docker compose up -d` recreates streamlit1.
+    resolver 127.0.0.11 valid=10s ipv6=off;
 
     server {
         listen 80;
         server_name _;
 
+        client_max_body_size 200m;
+
         location / {
-            proxy_pass http://streamlit;
+            set $streamlit_upstream streamlit1:8501;
+            proxy_pass http://$streamlit_upstream;
             proxy_http_version 1.1;
+            proxy_buffering off;
             proxy_set_header Host $host;
             proxy_set_header X-Real-IP $remote_addr;
             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
             proxy_set_header X-Forwarded-Proto $scheme;
             proxy_set_header Upgrade $http_upgrade;
             proxy_set_header Connection "upgrade";
+            proxy_connect_timeout 60s;
+            proxy_send_timeout 86400;
             proxy_read_timeout 86400;
+            # Large Streamlit responses / occasional upstream quirks
+            proxy_buffer_size 128k;
+            proxy_buffers 8 256k;
         }
     }
 }
diff --git a/evaluation_dashboard_app/docker-entrypoint.sh b/evaluation_dashboard_app/docker-entrypoint.sh
old mode 100644
new mode 100755
index c37a1b5..a3c26ac
--- a/evaluation_dashboard_app/docker-entrypoint.sh
+++ b/evaluation_dashboard_app/docker-entrypoint.sh
@@ -5,4 +5,4 @@ if [[ -n "${ROS_DISTRO}" && -f "/opt/ros/${ROS_DISTRO}/setup.bash" ]]; then
   source "/opt/ros/${ROS_DISTRO}/setup.bash"
 fi
 
-exec streamlit run Overview.py --server.address=0.0.0.0 --server.port=8501 "$@"
+exec streamlit run Overview.py --server.address=0.0.0.0 --server.port=8501 --server.headless=true --server.enableStaticServing=true "$@"
diff --git a/evaluation_dashboard_app/docs/PRODUCTION_DEPLOYMENT.md b/evaluation_dashboard_app/docs/PRODUCTION_DEPLOYMENT.md
index 3e84af6..4bc22fc 100644
--- a/evaluation_dashboard_app/docs/PRODUCTION_DEPLOYMENT.md
+++ b/evaluation_dashboard_app/docs/PRODUCTION_DEPLOYMENT.md
@@ -72,6 +72,7 @@ Heavy operations (download results, download scenarios, run eval_result, generat
 | `EVAL_DEPLOYMENT_DEBUG_DOCKER` | Set to `1` in [`deploy/docker-compose.yml`](deploy/docker-compose.yml) for Streamlit; enables the **Docker** tab when the host socket is mounted. Override in `.env` only if you change compose. | `1` in compose |
 | `EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT` | Compose project name (`docker compose ls`) to filter containers by `com.docker.compose.project`. Strongly recommended when the host runs other stacks. | (empty) |
 | `EVAL_DEPLOYMENT_DEBUG_EXEC` | When `1`/`true`, the Deployment debug **Docker** tab shows **Run command** (`sh -c` via `docker exec`). Default `0` in compose — enable in `.env` only briefly on trusted networks. | `0` |
+| `EVAL_COMPOSE_SCALE_WORKER` | Default number of `worker` replicas when using [`deploy/04_START.sh`](deploy/04_START.sh) / [`08_REBUILD_AND_START.sh`](deploy/08_REBUILD_AND_START.sh). | `2` |
 
 ## Build
 
@@ -113,26 +114,23 @@ docker compose build --no-cache
    docker compose up -d
    ```
 
-   To run multiple workers, use `--scale worker=N` (e.g. 3 workers):
+   The stack defaults to **two Streamlit** containers (`streamlit1`, `streamlit2`) behind Nginx and **two workers** (`EVAL_COMPOSE_SCALE_WORKER=2` in `.env`, applied by [`04_START.sh`](deploy/04_START.sh)). Override worker count with `--scale worker=N` (last flag wins) or change `EVAL_COMPOSE_SCALE_WORKER`.
 
    ```sh
-   docker-compose up -d --scale worker=3
+   docker compose up -d --scale worker=3
    ```
 
-   Default is one worker. All worker replicas share the same RQ queue.
+   All worker replicas share the same RQ queue.
 
 4. **Access the app**
 
    - Via Nginx: **http://localhost** (port 80)
-   - Streamlit directly (if you expose it): port 8501 on the `streamlit` service (not exposed by default when using Nginx)
+   - Streamlit directly (if you expose ports in compose): 8501 on `streamlit1` / `streamlit2` (not exposed by default when using Nginx)
 
 ## Scaling
 
-- **Workers**: Use Docker Compose `--scale` to run more worker containers. From the `deploy/` directory:
-  - **Default (1 worker):** `docker-compose up -d`
-  - **N workers:** `docker-compose up -d --scale worker=N`  
-    Example: `docker-compose up -d --scale worker=3` runs three workers; all consume from the same RQ queue.
-- **Streamlit replicas**: In `deploy/docker-compose.yml`, duplicate the `streamlit` service (e.g. `streamlit2`) and add `server streamlit2:8501;` to `deploy/nginx/nginx.conf` in the `upstream streamlit` block.
+- **Workers**: Default replica count is `EVAL_COMPOSE_SCALE_WORKER` (see `.env.example`; [`04_START.sh`](deploy/04_START.sh) passes `--scale worker=…`). From the `deploy/` directory you can also run `docker compose up -d --scale worker=N` (e.g. three workers); all consume from the same RQ queue.
+- **Streamlit replicas**: By default, `streamlit1` and `streamlit2` share one Nginx `upstream` with `ip_hash` for session stickiness. To add more, duplicate the `x-streamlit-app` service in [`deploy/docker-compose.yml`](deploy/docker-compose.yml), add `depends_on` for Nginx, and add `server streamlit3:8501;` (etc.) in [`deploy/nginx/nginx.conf`](deploy/nginx/nginx.conf).
 
 ## TLS (HTTPS)
 
@@ -151,18 +149,18 @@ To serve over HTTPS, configure Nginx with SSL certificates (e.g. Let's Encrypt)
 | "Failed to enqueue task" | `REDIS_URL` and `DATABASE_URL` are set; Redis and Postgres containers are running; `USE_TASK_QUEUE=true`. |
 | Tasks stay "pending" | Worker container is running; same `REDIS_URL` and `RQ_QUEUE` as Streamlit; worker logs for errors. |
 | Postgres connection refused | Postgres is healthy (`docker-compose ps`); `DATABASE_URL` uses hostname `postgres` and correct port (5432). |
-| Nginx 502 Bad Gateway | Streamlit container is up and listening on 8501; Nginx `upstream` points to `streamlit:8501`. |
+| Nginx 502 Bad Gateway | Streamlit containers are up and listening on 8501; Nginx `upstream` lists `streamlit1:8501` and `streamlit2:8501`. |
 
 ## Deployment debug page (Docker socket)
 
 The Streamlit page **Deployment debug** (`pages/99_Deployment_Debug.py` — required at top level so `st.page_link` works; default sidebar entry is hidden outside Docker via CSS; **Overview** adds a sidebar link when running in Docker) shows redacted environment variables, Postgres/Redis/RQ checks, task counts, and Docker container status and log tails.
 
-- [`deploy/docker-compose.yml`](deploy/docker-compose.yml) mounts `/var/run/docker.sock` into the `streamlit` service and sets `EVAL_DEPLOYMENT_DEBUG_DOCKER=1`. After `docker compose up -d`, restart or recreate Streamlit if you change compose or env.
+- [`deploy/docker-compose.yml`](deploy/docker-compose.yml) mounts `/var/run/docker.sock` into each Streamlit service (`streamlit1`, `streamlit2`) and sets `EVAL_DEPLOYMENT_DEBUG_DOCKER=1`. After `docker compose up -d`, restart or recreate those services if you change compose or env.
 - Set `EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT` in `.env` to your Compose project name (from `docker compose ls`) so the UI lists only this stack’s containers. If it is unset, the page lists every container visible to the daemon and shows a warning.
-- Rebuild the image after adding the `docker` PyPI package to `requirements-docker.txt` (or `docker compose build streamlit`).
+- Rebuild the image after adding the `docker` PyPI package to `requirements-docker.txt` (or `docker compose build streamlit1`).
 - **Exec**: set `EVAL_DEPLOYMENT_DEBUG_EXEC=1` in `.env` and recreate Streamlit to enable one-shot `sh -c` commands in the selected container (same power as `docker exec`). Leave at `0` when you only need logs.
 
-**Risk**: any user who can open the app with socket access can read logs for containers matched by the filter. With `EVAL_DEPLOYMENT_DEBUG_EXEC=1`, they can also run shell commands inside those containers. Restrict access with VPN, SSO/auth proxy, or remove the socket mount and debug env from the `streamlit` service in compose if that risk is unacceptable.
+**Risk**: any user who can open the app with socket access can read logs for containers matched by the filter. With `EVAL_DEPLOYMENT_DEBUG_EXEC=1`, they can also run shell commands inside those containers. Restrict access with VPN, SSO/auth proxy, or remove the socket mount and debug env from the Streamlit services in compose if that risk is unacceptable.
 
 ## Data on the host (bind mounts)
 
@@ -198,7 +196,7 @@ Rebuild the image only when you change dependencies (e.g. `requirements-docker.t
 
 ```
 deploy/
-  docker-compose.yml                  # full stack; streamlit includes Docker socket for Deployment debug
+  docker-compose.yml                  # full stack; streamlit1/streamlit2 + Docker socket for Deployment debug
   .env.example
   nginx/
     nginx.conf
diff --git a/evaluation_dashboard_app/docs/guide/data_reports.html b/evaluation_dashboard_app/docs/guide/data_reports.html
new file mode 100644
index 0000000..a9a5d14
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/data_reports.html
@@ -0,0 +1,245 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Evaluation Dashboard Data and Reports</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>Data and Reports</div>
+      <h1>Artifacts</h1>
+      <p class="lead">
+        The dashboard is driven by files. Understanding which file powers which page makes the app much easier to use and debug.
+      </p>
+    </div>
+  </header>
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a class="active" href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Run Model</div>
+          <h2>A run is a folder under the data root.</h2>
+          <p class="lead">
+            The default data root is <code>data/</code>. In production it can be changed with
+            <code>EVAL_DASHBOARD_DATA_ROOT</code>. Download and Eval paths are restricted under this root.
+          </p>
+        </div>
+        <div class="split">
+          <div class="tree">
+            <div>data/</div>
+            <div>  my_test_20250203/</div>
+            <div>    Summary.csv</div>
+            <div>    Score.csv</div>
+            <div>    result.txt / score.json / logs...</div>
+            <div>    current.parquet / future.parquet</div>
+            <div>    resources/metadata.yaml + summary.json</div>
+            <div>    specsheet/specsheet.pdf</div>
+          </div>
+          <div class="card">
+            <h3>Why one folder per test?</h3>
+            <p>
+              It keeps Overview selection simple, makes Data Management safer, and lets users share links using stable run names.
+              If output is scattered across arbitrary folders, users cannot easily know what to select or delete.
+            </p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Core Files</div>
+          <h2>Which artifact powers which page?</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Artifact</th><th>Created By</th><th>Used By</th><th>Meaning</th></tr></thead>
+          <tbody>
+            <tr>
+              <td><code>Summary.csv</code></td>
+              <td>Download -> Eval Results</td>
+              <td>Overview, TP Summary</td>
+              <td>Object-level summary metrics such as TP, x/y RMS, x/y STD, velocity, perception labels, and product labels.</td>
+            </tr>
+            <tr>
+              <td><code>Score.csv</code></td>
+              <td>Download -> Eval Results</td>
+              <td>Criteria Based Score</td>
+              <td>Criteria block metrics including scenario, optional dataset ID, option, GT object, criteria label, NM, TP/TN, ADD, AIL, UIL, PFN/PFP, Practical Pass Rate, thresholds, and counts.</td>
+            </tr>
+            <tr>
+              <td><code>.parquet</code></td>
+              <td>Download/eval/parquet build workflows</td>
+              <td>Detection Stats, Bounding Box Viewer, Prediction Evaluation, Debug</td>
+              <td>Structured frame/object rows: position, dimensions, yaw, label, status, source, scenario metadata, and prediction metrics.</td>
+            </tr>
+            <tr>
+              <td><code>metadata.yaml</code> + <code>summary.json</code></td>
+              <td>Specsheet/trend generation or analyzer output</td>
+              <td>Trend Insights, Specsheet trend export</td>
+              <td>Release identity and trend summary payloads. Summary shape decides full/usecase/devops role.</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Score.csv Structure</div>
+          <h2>Score.csv is the Criteria page source.</h2>
+          <p class="lead">
+            Each row describes one scenario result and then repeats the criteria metric block for each
+            available criteria range.
+          </p>
+        </div>
+        <table class="table">
+          <thead><tr><th>Part</th><th>Fields</th><th>How the dashboard uses it</th></tr></thead>
+          <tbody>
+            <tr>
+              <td>Row identity</td>
+              <td><code>Scenario</code>, optional <code>Dataset</code></td>
+              <td>Used for scenario filters, scenario leaderboards, compare joins, gates, and PDF tables.</td>
+            </tr>
+            <tr>
+              <td>Scenario context</td>
+              <td><code>Option</code>, <code>GT_OBJ</code></td>
+              <td>Used for grouping charts and understanding the matching policy/object class behind the row.</td>
+            </tr>
+            <tr>
+              <td>Criteria block</td>
+              <td><code>Distance</code>, <code>NM</code>, <code>TP/TN</code>, <code>ADD</code>, <code>AIL</code>, <code>UIL</code>, <code>PFN/PFP</code>, <code>UUID Num</code>, <code>Practical Pass Rate</code>, <code>MAX_DIST_THRESH</code>, <code>OBJ_CNTS</code></td>
+              <td>Used by Criteria Based Score for distributions, deltas, absolute gates, and scenario-level ranking.</td>
+            </tr>
+          </tbody>
+        </table>
+        <div class="callout">
+          <strong>Tip:</strong> if two rows share a scenario name but have different dataset IDs, the app keeps
+          them separate in Criteria comparisons and gate summaries.
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Pass Metrics</div>
+          <h2>TP is not the same metric as Practical Pass Rate.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Dashboard label</th><th>Source</th><th>Calculation / meaning</th><th>Used by</th></tr></thead>
+          <tbody>
+            <tr>
+              <td><code>TP</code>, TP mean</td>
+              <td><code>Summary.csv</code></td>
+              <td>Comes from <code>summarize_ratio()</code> as TP rate. AIL and ADD are not added to this metric.</td>
+              <td>Overview, TP Summary, dashboard PDF TP sections.</td>
+            </tr>
+            <tr>
+              <td><code>pass_rate</code>, Pass rate mean</td>
+              <td><code>Score.csv</code> <code>Practical Pass Rate</code></td>
+              <td><code>(TP/TN + ADD + AIL) / NM * 100</code>. AIL and ADD are pass-side outcomes for this practical score.</td>
+              <td>Criteria Based Score, absolute pass/fail gates, Criteria PDF sections.</td>
+            </tr>
+          </tbody>
+        </table>
+        <div class="callout warn">
+          <strong>Important:</strong> when reviewing pass/fail gates, read “pass rate” as
+          <strong>Practical Pass Rate</strong>, not the traditional TP rate from <code>Summary.csv</code>.
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Trend Data</div>
+          <h2>Trend summaries are classified by JSON shape.</h2>
+          <p class="lead">
+            Trend Insights scans the data root for <code>metadata.yaml</code> files that have sibling <code>summary.json</code>.
+            It then classifies the summary and groups related jobs into releases.
+          </p>
+        </div>
+        <div class="grid cols-3">
+          <div class="card">
+            <h3>Full performance</h3>
+            <p>Summary has <code>blocks</code> containing the header <code>全数データセット評価</code>. Used for mAP, precision, recall, error, and prediction trends.</p>
+          </div>
+          <div class="card">
+            <h3>Usecase</h3>
+            <p>Summary has <code>blocks</code> containing <code>ユースケース評価</code>. It participates in release grouping and inventory.</p>
+          </div>
+          <div class="card">
+            <h3>DevOps pass-rate</h3>
+            <p>Summary is a nested dictionary without <code>blocks</code>, with category results containing <code>passed</code> and <code>total</code>.</p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Report Outputs</div>
+          <h2>Reports are optional outputs, not the main app path.</h2>
+          <p class="lead">
+            Users can explore directly in Streamlit, then export when they need a portable artifact for review.
+          </p>
+        </div>
+        <div class="grid cols-3">
+          <div class="card">
+            <h3>Dashboard PDF</h3>
+            <p>Generated from the current Overview selection and filters. Best for summarizing the dashboard state as a curated report.</p>
+          </div>
+          <div class="card">
+            <h3>Release Specsheet PDF</h3>
+            <p>Advanced release-oriented report generated through <code>perception_catalog_analyzer</code>. It can include trend pages when trend metadata is enabled.</p>
+            <div class="actions"><a class="button" href="specsheet.html">Open specsheet details</a></div>
+          </div>
+          <div class="card">
+            <h3>ZIP outputs</h3>
+            <p>Data Management can package outputs for download, useful when moving run artifacts out of a shared server.</p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Debugging by Artifact</div>
+          <h2>When a page is empty, first check the file it needs.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Symptom</th><th>Likely missing</th><th>Fix</th></tr></thead>
+          <tbody>
+            <tr><td>Overview summary is sparse</td><td><code>Summary.csv</code></td><td>Generate Summary.csv from Download -> Eval Results.</td></tr>
+            <tr><td>Criteria page has no rows</td><td><code>Score.csv</code></td><td>Generate Score.csv from result files or score JSON.</td></tr>
+            <tr><td>Detection/BEV pages cannot load</td><td>Parquet files</td><td>Build or place parquet artifacts under the expected data root/run path.</td></tr>
+            <tr><td>Trend Insights has no releases</td><td><code>metadata.yaml</code> + <code>summary.json</code></td><td>Generate or copy trend-compatible release outputs under data root.</td></tr>
+            <tr><td>Specsheet trend section says no data</td><td>Trend rows or PNG plots</td><td>Check trend classification and generated plot files in <code>specsheet/</code>.</td></tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+  </main>
+  <footer class="footer"><div class="shell"><a href="index.html">Back to guide home</a></div></footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/deployment.html b/evaluation_dashboard_app/docs/guide/deployment.html
new file mode 100644
index 0000000..19b9c23
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/deployment.html
@@ -0,0 +1,190 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Evaluation Dashboard Deployment Guide</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>Deployment and Operations</div>
+      <h1>Deploy</h1>
+      <p class="lead">
+        Run locally for development. Use the production compose stack when multiple people need a shared server,
+        background workers, task history, and operational visibility.
+      </p>
+    </div>
+  </header>
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a class="active" href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Local Development</div>
+          <h2>Fastest way to run the app.</h2>
+          <p class="lead">
+            Local mode is best for development, one-person analysis, and debugging. Heavy jobs run in the Streamlit process
+            unless task queue environment variables are enabled.
+          </p>
+        </div>
+        <div class="split">
+          <div class="code">
+            <div class="code-title">local start</div>
+            <pre>cd evaluation_dashboard_app
+pip install -r requirements.txt
+streamlit run Overview.py</pre>
+          </div>
+          <div class="card">
+            <h3>Local prerequisites</h3>
+            <ul>
+              <li>Python packages from <code>requirements.txt</code>.</li>
+              <li>pilot-auto / <code>perception_eval</code> environment only when generating Summary/Score.</li>
+              <li>Evaluator API credentials when using Download pages.</li>
+              <li>Chrome availability for some static image/PDF export flows.</li>
+            </ul>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Single Docker Container</div>
+          <h2>Portable app container for simple usage.</h2>
+        </div>
+        <div class="grid cols-2">
+          <div class="card">
+            <h3>Build image</h3>
+            <p>Private dependencies may require passing a GitHub SSH key as a Docker build secret.</p>
+            <div class="code" style="margin-top:12px">
+              <div class="code-title">build</div>
+              <pre>docker build --no-cache \
+  --secret id=ssh,src=$HOME/.ssh/id_rsa \
+  -t evaluation-dashboard .</pre>
+            </div>
+          </div>
+          <div class="card">
+            <h3>Run with persistent data</h3>
+            <p>Always mount the data directory so runs survive container restarts.</p>
+            <div class="code" style="margin-top:12px">
+              <div class="code-title">run</div>
+              <pre>docker run -p 8501:8501 \
+  -v "$(pwd)/data:/app/data" \
+  -v ~/.webauto:/root/.webauto \
+  evaluation-dashboard</pre>
+            </div>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Production Stack</div>
+          <h2>Nginx to Streamlit to Redis workers to Postgres.</h2>
+          <p class="lead">
+            In production, heavy operations should not block Streamlit. The app enqueues jobs to Redis, workers execute them,
+            and Postgres stores task state for Recent Tasks and operational visibility.
+          </p>
+        </div>
+        <div class="flow">
+          <div class="step"><strong>Browser</strong><span>Team users open the shared app.</span></div>
+          <div class="step"><strong>Nginx</strong><span>Reverse proxy, optional TLS/load balancing.</span></div>
+          <div class="step"><strong>Streamlit</strong><span>UI, filters, enqueue requests, task status.</span></div>
+          <div class="step"><strong>Redis + Worker</strong><span>RQ queue and heavy background jobs.</span></div>
+          <div class="step"><strong>Postgres + Data</strong><span>Task metadata and shared run artifacts.</span></div>
+        </div>
+        <div class="code" style="margin-top:24px">
+          <div class="code-title">recommended numbered scripts</div>
+          <pre>cd deploy
+./01_SETUP_ENV.sh       # create .env if missing, then edit manually
+./02_BUILD.sh --no-cache
+./03_INIT_DB.sh         # first time only
+./04_START.sh           # start nginx, streamlit, redis, postgres, workers
+./06_STATUS.sh          # inspect service status
+./07_LOGS.sh worker     # tail logs for a service</pre>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Environment Variables</div>
+          <h2>The settings that matter most.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Variable</th><th>Purpose</th></tr></thead>
+          <tbody>
+            <tr><td><code>EVAL_DASHBOARD_DATA_ROOT</code></td><td>Shared evaluation data root. Streamlit and workers must see the same path.</td></tr>
+            <tr><td><code>USE_TASK_QUEUE</code></td><td>Enable Redis/RQ worker mode. Recommended for production.</td></tr>
+            <tr><td><code>DATABASE_URL</code></td><td>Postgres task metadata connection string.</td></tr>
+            <tr><td><code>REDIS_URL</code></td><td>Redis queue connection string.</td></tr>
+            <tr><td><code>RQ_JOB_TIMEOUT_SEC</code></td><td>Long timeout for downloads/eval jobs; default is intentionally much longer than RQ's built-in default.</td></tr>
+            <tr><td><code>EVAL_DASHBOARD_CONFIG</code></td><td>Docker-specific JSON config path mounted from <code>deploy/configs/</code>.</td></tr>
+            <tr><td><code>EVAL_DEPLOYMENT_DEBUG_EXEC</code></td><td>Enables Docker exec from Deployment Debug. Keep off unless briefly needed on a trusted network.</td></tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Multi-User Operation</div>
+          <h2>A shared server, not per-user accounts.</h2>
+          <p class="lead">
+            The app is designed as a local-team tool. Everyone who can access the server can see shared data and use server-side API credentials.
+          </p>
+        </div>
+        <div class="grid cols-4">
+          <div class="card"><h3>Shared data</h3><p>All run folders under the data root are visible to all users.</p></div>
+          <div class="card"><h3>Path safety</h3><p>Download and eval paths are resolved under the data root; traversal is rejected.</p></div>
+          <div class="card"><h3>Shared credentials</h3><p>Download API credentials are mounted server-side, not entered by each user.</p></div>
+          <div class="card"><h3>Share links</h3><p>Users share Overview URLs with <code>mode</code>, <code>run_a</code>, and <code>run_b</code>.</p></div>
+        </div>
+        <div class="callout warn" style="margin-top:20px">
+          <strong>Access control lives outside the app:</strong> use VPN, firewall, SSO proxy, or network controls if the server should only be reachable by your team.
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Operations Checklist</div>
+          <h2>What to check when production feels unhealthy.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Issue</th><th>Check</th></tr></thead>
+          <tbody>
+            <tr><td>Failed to enqueue task</td><td>Confirm Redis, Postgres, <code>USE_TASK_QUEUE=true</code>, and matching URLs.</td></tr>
+            <tr><td>Tasks stay pending</td><td>Worker is running, same <code>RQ_QUEUE</code>, worker logs show no import/config errors.</td></tr>
+            <tr><td>Nginx 502</td><td>Streamlit is listening on 8501, not OOM-killed, and Nginx upstream matches service names.</td></tr>
+            <tr><td>Subpage forgets Overview state</td><td>Use Overview share link with <code>run_a</code> query params, especially with multiple Streamlit replicas.</td></tr>
+            <tr><td>Detection Stats freezes</td><td>Set <code>EVAL_DETECTION_STATS_DEBUG=1</code> and inspect section timing/memory output.</td></tr>
+            <tr><td>PDF Chrome/Kaleido error</td><td>Rebuild the image so Chrome is installed in the Docker environment.</td></tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+  </main>
+  <footer class="footer"><div class="shell"><a href="index.html">Back to guide home</a></div></footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/getting_started.html b/evaluation_dashboard_app/docs/guide/getting_started.html
new file mode 100644
index 0000000..4940f26
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/getting_started.html
@@ -0,0 +1,206 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>How to Use the Evaluation Dashboard</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>User Workflow</div>
+      <h1>How to Use</h1>
+      <p class="lead">
+        This is the practical path for users: get evaluator data, generate artifacts, view a run,
+        compare candidate results, and share the exact dashboard state with teammates.
+      </p>
+    </div>
+  </header>
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a class="active" href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section id="download" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Workflow 1</div>
+          <h2>Download evaluator results into a run folder.</h2>
+          <p class="lead">
+            A run is normally one direct subdirectory under <code>data/</code>. Use one folder per test so
+            it is easy to select in Overview and safe to delete later.
+          </p>
+        </div>
+        <div class="flow">
+          <div class="step"><strong>Open Workflow or Download</strong><span>Use <strong>Evaluator Workflow</strong> for the guided path, or <code>pages/6_Download.py</code> for manual tabs.</span></div>
+          <div class="step"><strong>Select Download Results</strong><span>Enter Project ID, Job ID, and optional Suite ID.</span></div>
+          <div class="step"><strong>Choose Output Path</strong><span>Recommended: <code>data/&lt;test_name&gt;</code>.</span></div>
+          <div class="step"><strong>Pick Download Type</strong><span>Archives for full local analysis, Result JSON only for lightweight summary generation.</span></div>
+          <div class="step"><strong>Run Download</strong><span>Wait for completion or watch Recent Tasks when queue mode is enabled.</span></div>
+        </div>
+        <div class="split" style="margin-top:24px">
+          <div class="media">
+            <img src="../images/download_config.png" alt="Download Results configuration" />
+            <div class="caption">Use a dedicated output folder under the data root. That folder becomes the run you select later.</div>
+          </div>
+          <div class="card">
+            <h3>Decision: Archives or Result JSON only?</h3>
+            <ul>
+              <li><strong>Archives (ZIP):</strong> best for complete local investigation, eval_result, parquet generation, and visual inspection.</li>
+              <li><strong>Result JSON only:</strong> faster and lighter. Good when you mainly need downloaded result JSON, not full local analysis.</li>
+              <li><strong>Scenario downloads:</strong> use the Download Scenarios tab when TLR Analysis needs scenario data.</li>
+            </ul>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section id="generate" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Workflow 2</div>
+          <h2>Generate Summary.csv and Score.csv.</h2>
+          <p class="lead">
+            Most analysis pages need generated CSV artifacts. Stay on Download, switch to Eval Results,
+            and point the root directory to the same folder you used as the download output path.
+          </p>
+        </div>
+        <div class="grid cols-3">
+          <div class="card">
+            <div class="number">1</div>
+            <h3>Root directory to evaluate</h3>
+            <p>Use the same path, for example <code>data/my_test_20250203</code>. This keeps generated artifacts next to the run.</p>
+          </div>
+          <div class="card">
+            <div class="number">2</div>
+            <h3>Search subdirectories</h3>
+            <p>Usually enable this. It lets the app find <code>result.txt</code> or <code>score.json</code> in job/suite subfolders.</p>
+          </div>
+          <div class="card">
+            <div class="number">3</div>
+            <h3>Choose generation mode</h3>
+            <p>If results already exist, generate only Summary/Score. If not, run full eval_result generation.</p>
+          </div>
+        </div>
+        <div class="callout" style="margin-top:20px">
+          <strong>Score.csv identity:</strong> Criteria pages identify rows by scenario. When a
+          <code>Dataset</code> field is available, scenario and dataset are treated together so repeated
+          scenario names from different datasets stay separate.
+        </div>
+        <div class="split" style="margin-top:24px">
+          <div class="media">
+            <img src="../images/eval_result.png" alt="Eval Results screen" />
+            <div class="caption">Eval Results produces the CSVs consumed by Overview, TP Summary, and Criteria pages.</div>
+          </div>
+          <div class="callout warn">
+            <strong>Environment note:</strong> when generation uses <code>perception_eval</code>, activate the pilot-auto ROS environment first:
+            <div class="code" style="margin-top:12px">
+              <div class="code-title">before running generation</div>
+              <pre>source path_to_pilot/install/setup.sh</pre>
+            </div>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section id="view" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Workflow 3</div>
+          <h2>Select the run in Overview and explore.</h2>
+          <p class="lead">
+            Overview is the state hub. Many pages use the run selection and compare mode from Overview,
+            so users should start there before opening detail pages.
+          </p>
+        </div>
+        <div class="split">
+          <div class="card">
+            <h3>Single-run review</h3>
+            <ol>
+              <li>Open <strong>Overview</strong>.</li>
+              <li>Select <strong>Single Mode</strong>.</li>
+              <li>Choose your run as <strong>Baseline (A)</strong>.</li>
+              <li>Apply Perception Label or Product Label filters if needed.</li>
+              <li>Move to TP Summary, Criteria, Detection Stats, Bounding Box Viewer, or Prediction Evaluation.</li>
+            </ol>
+          </div>
+          <div class="media">
+            <img src="../images/overview.png" alt="Overview screen" />
+            <div class="caption">Overview gives the first read: summary metrics, filters, report export, and links to specialized pages.</div>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section id="compare" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Workflow 4</div>
+          <h2>Compare baseline A against candidate B.</h2>
+          <p class="lead">
+            Compare mode lets users answer the product question: did this candidate improve, regress,
+            or change behavior in a specific slice?
+          </p>
+        </div>
+        <div class="grid cols-2">
+          <div class="card">
+            <h3>How to set up compare mode</h3>
+            <ol>
+              <li>Open Overview.</li>
+              <li>Switch to <strong>Compare Mode</strong>.</li>
+              <li>Select Baseline (A), usually the current accepted run.</li>
+              <li>Select Candidate (B), usually the new run.</li>
+              <li>Check the summary metric deltas before going deeper.</li>
+            </ol>
+          </div>
+          <div class="card">
+            <h3>Where compare mode is most useful</h3>
+            <ul>
+              <li><strong>TP Summary:</strong> TP and kinematic metric deltas.</li>
+              <li><strong>Criteria Score:</strong> Practical Pass Rate changes and absolute gate comparison.</li>
+              <li><strong>Detection Stats:</strong> TP/FP distance-bin and status distribution differences.</li>
+              <li><strong>Bounding Box Viewer:</strong> spatial inspection across runs.</li>
+              <li><strong>Prediction Evaluation:</strong> ADE/FDE delta matrices and distance bins.</li>
+            </ul>
+          </div>
+        </div>
+        <div class="callout" style="margin-top:20px">
+          <strong>Sharing:</strong> Overview stores mode and run choices in URL query parameters such as
+          <code>?mode=compare&amp;run_a=old_run&amp;run_b=new_run</code>. Copy that link to let another user open the same comparison.
+        </div>
+      </div>
+    </section>
+
+    <section id="recommended" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Recommended Review Route</div>
+          <h2>A good investigation has a rhythm.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Step</th><th>Page</th><th>What to look for</th></tr></thead>
+          <tbody>
+            <tr><td>1</td><td>Overview</td><td>High-level TP, error metrics, filters, and immediate A/B signal.</td></tr>
+            <tr><td>2</td><td>Criteria Score</td><td>Practical Pass Rate distribution, failing scenarios, absolute gates, and scenario leaderboard.</td></tr>
+            <tr><td>3</td><td>Detection Stats</td><td>Status distribution, TP/FP by distance, label/scenario concentration, and object-count shifts.</td></tr>
+            <tr><td>4</td><td>Bounding Box Viewer</td><td>Frame-level spatial causes: missed objects, false positives, geometry, visibility, source/status.</td></tr>
+            <tr><td>5</td><td>Prediction Evaluation</td><td>ADE/FDE behavior by label, horizon, distance bin, and polar region.</td></tr>
+            <tr><td>6</td><td>Trend Insights</td><td>Release-level story across full, usecase, and devops jobs.</td></tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+  </main>
+  <footer class="footer"><div class="shell"><a href="index.html">Back to guide home</a></div></footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/guide.js b/evaluation_dashboard_app/docs/guide/guide.js
new file mode 100644
index 0000000..196ab83
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/guide.js
@@ -0,0 +1,10 @@
+const observer = new IntersectionObserver((entries) => {
+  entries.forEach((entry) => {
+    if (entry.isIntersecting) {
+      entry.target.classList.add("in");
+      observer.unobserve(entry.target);
+    }
+  });
+}, { threshold: 0.12 });
+
+document.querySelectorAll(".reveal").forEach((el) => observer.observe(el));
diff --git a/evaluation_dashboard_app/docs/guide/index.html b/evaluation_dashboard_app/docs/guide/index.html
new file mode 100644
index 0000000..154bb61
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/index.html
@@ -0,0 +1,142 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Evaluation Dashboard Guide</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero">
+    <div class="shell hero-grid">
+      <div>
+        <div class="eyebrow"><span class="signal"></span>Dashboard Documentation</div>
+        <h1>Evaluation Dashboard<br>Guide</h1>
+        <p class="lead">
+          This guide is the entry point for using, debugging, extending, and deploying the
+          evaluation dashboard. The home page gives the system map; each substantial topic lives in
+          its own focused chapter.
+        </p>
+        <div class="actions">
+          <a class="button primary" href="getting_started.html">Use the Tool</a>
+          <a class="button" href="visual_systems.html">Architecture Diagrams</a>
+          <a class="button" href="deployment.html">Deploy</a>
+        </div>
+      </div>
+      <div class="card hero-console">
+        <h3>Guide Structure</h3>
+        <ul>
+          <li><strong>Home:</strong> system role, ownership map, and chapter routing.</li>
+          <li><strong>Workflow:</strong> Download -> Eval Results -> Overview -> Compare.</li>
+          <li><strong>Page Guide:</strong> page-by-page artifact and state contracts.</li>
+          <li><strong>Data/Reports:</strong> run artifacts, trend data, dashboard PDF, specsheet.</li>
+          <li><strong>Deployment:</strong> local, Docker, production, task queue, multi-user operations.</li>
+          <li><strong>Diagrams:</strong> real sequence/system diagrams for key flows.</li>
+        </ul>
+      </div>
+    </div>
+  </header>
+
+  <nav>
+    <div class="shell nav-inner">
+      <a class="active" href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">System Role</div>
+          <h2>The dashboard turns evaluator outputs into explorable review evidence.</h2>
+          <p>
+            It reads run folders under the configured data root, generates dashboard artifacts when
+            needed, shares selected run state across Streamlit pages, and provides local or production
+            workflows for comparison, report generation, T4 visualization, and release trend review.
+          </p>
+        </div>
+        <div class="grid cols-4 metric-grid">
+          <div class="card metric"><strong>Overview.py</strong><span>Run selection, compare mode, filters, share links, dashboard PDF, and specsheet entry.</span></div>
+          <div class="card metric"><strong>pages/</strong><span>Numbered Streamlit pages. Filename order is part of the navigation contract.</span></div>
+          <div class="card metric"><strong>lib/</strong><span>Data loading, plotting, reporting, T4 clients, task queue integration, and shared UI utilities.</span></div>
+          <div class="card metric"><strong>deploy/</strong><span>Docker Compose, Nginx, Redis/RQ workers, Postgres, and production scripts.</span></div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal alt">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Chapter Map</div>
+          <h2>Open the chapter that matches your question.</h2>
+        </div>
+        <div class="grid cols-3">
+          <a class="card feature-card" href="getting_started.html" style="text-decoration:none">
+            <h3>How do I use this tool?</h3>
+            <p>Download results, generate Eval Results, select runs in Overview, compare baseline and candidate, then inspect detail pages.</p>
+          </a>
+          <a class="card feature-card analysis" href="pages.html" style="text-decoration:none">
+            <h3>What does each page require?</h3>
+            <p>Inputs, outputs, compare behavior, and empty-state causes for Overview, TP, Criteria, Detection, BEV, T4, TLR, Trend, and debug pages.</p>
+          </a>
+          <a class="card feature-card ops" href="data_reports.html" style="text-decoration:none">
+            <h3>How are data and reports handled?</h3>
+            <p>Summary.csv, Score.csv, parquet, trend metadata, dashboard PDFs, release specsheets, and report troubleshooting.</p>
+          </a>
+          <a class="card feature-card advanced" href="deployment.html" style="text-decoration:none">
+            <h3>How should this be deployed?</h3>
+            <p>Local development, single-container Docker, production compose, Nginx, Redis/RQ, Postgres, env vars, and multi-user operation.</p>
+          </a>
+          <a class="card feature-card spatial" href="visual_systems.html" style="text-decoration:none">
+            <h3>How does the system really flow?</h3>
+            <p>Artifact maps, Download/Eval sequence, Compare state propagation, T4 camera rendering, T4 Three.js overlays, and report generation.</p>
+          </a>
+          <a class="card feature-card advanced" href="specsheet.html" style="text-decoration:none">
+            <h3>How does specsheet work?</h3>
+            <p>Focused deep dive for release specsheet generation, trend context, DevOps/pass-rate plots, and generated PDF sections.</p>
+          </a>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell split">
+        <div>
+          <div class="kicker">Primary Flow</div>
+          <h2>The common operational path is still one clear chain.</h2>
+          <p>
+            The detailed instructions live in the Workflow chapter, but the mental model is simple:
+            create or choose a run folder, generate the dashboard artifacts, select run A in Overview,
+            optionally select candidate B, then use the dedicated pages for deeper analysis.
+          </p>
+          <div class="actions">
+            <a class="button primary" href="getting_started.html">Open Workflow Chapter</a>
+            <a class="button" href="visual_systems.html#download-sequence">Open Sequence Diagram</a>
+          </div>
+        </div>
+        <div class="flow compact-flow">
+          <div class="step"><strong>Download</strong><span>Project/Job/Suite results into a run folder.</span></div>
+          <div class="step"><strong>Eval Results</strong><span>Generate Summary, Score, and parquet artifacts.</span></div>
+          <div class="step"><strong>Overview</strong><span>Select run A and synchronize state.</span></div>
+          <div class="step"><strong>Compare</strong><span>Add candidate B when needed.</span></div>
+          <div class="step"><strong>Detail Pages</strong><span>Investigate the specific signal.</span></div>
+        </div>
+      </div>
+    </section>
+  </main>
+
+  <footer class="footer">
+    <div class="shell">
+      <strong>Evaluation Dashboard Guide</strong>
+      <p>This home page routes to the detailed chapters instead of duplicating them.</p>
+    </div>
+  </footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/pages.html b/evaluation_dashboard_app/docs/guide/pages.html
new file mode 100644
index 0000000..3f19d17
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/pages.html
@@ -0,0 +1,198 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Evaluation Dashboard Page Guide</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>Page-by-Page Guide</div>
+      <h1>Pages</h1>
+      <p class="lead">
+        A detailed guide to every dashboard page: what it needs, what it shows, how it behaves in compare mode,
+        and when users should open it.
+      </p>
+    </div>
+  </header>
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a class="active" href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">State Model</div>
+          <h2>Start in Overview, then go deep.</h2>
+          <p class="lead">
+            Overview sets mode, selected runs, and shared filters. Detail pages often read those values from
+            <code>st.session_state</code>, so opening Overview first prevents confusing “please load data” messages.
+          </p>
+        </div>
+        <div class="flow">
+          <div class="step"><strong>Overview</strong><span>Select run A, optional run B, labels, and mode.</span></div>
+          <div class="step"><strong>Shared state</strong><span>The app stores run objects and filters in session state.</span></div>
+          <div class="step"><strong>Detail pages</strong><span>Pages read the active run and specialize the analysis.</span></div>
+          <div class="step"><strong>URL sharing</strong><span>Overview can encode mode and run names into query params.</span></div>
+          <div class="step"><strong>Team review</strong><span>Users open the same linked comparison on the shared server.</span></div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Core Pages</div>
+          <h2>The pages most users touch first.</h2>
+        </div>
+        <div class="grid cols-2">
+          <div class="card feature-card">
+            <h3>Overview</h3>
+            <p><strong>Use when:</strong> starting any review, choosing runs, comparing A/B, exporting dashboard PDFs, or generating release specsheets.</p>
+            <ul>
+              <li><strong>Inputs:</strong> run folders under data root, <code>Summary.csv</code>, labels, compare mode.</li>
+              <li><strong>Shows:</strong> summary metrics, label/product filters, A/B charts, dashboard report export, specsheet export.</li>
+              <li><strong>Watch out:</strong> if a run has no Summary.csv, high-level summary metrics are limited.</li>
+            </ul>
+          </div>
+          <div class="card feature-card">
+            <h3>Download</h3>
+            <p><strong>Use when:</strong> acquiring evaluator results, scenario data, or generating Summary/Score artifacts.</p>
+            <ul>
+              <li><strong>Tabs:</strong> Download Results, Download Scenarios, View Downloads, Eval Results.</li>
+              <li><strong>Outputs:</strong> downloaded archives, result JSON, scenario data, <code>Summary.csv</code>, <code>Score.csv</code>.</li>
+              <li><strong>Score.csv:</strong> contains scenario identity, optional dataset ID, criteria blocks, and Practical Pass Rate.</li>
+              <li><strong>Queue behavior:</strong> with <code>USE_TASK_QUEUE=true</code>, heavy tasks run in workers and appear in Recent Tasks.</li>
+            </ul>
+          </div>
+          <div class="card feature-card">
+            <h3>Evaluator Workflow</h3>
+            <p><strong>Use when:</strong> you want a more guided operational flow for local runs, background tasks, fresh evaluator pipelines, and report reuse.</p>
+            <ul>
+              <li><strong>Good for:</strong> launching longer evaluator workflows without jumping between many manual steps.</li>
+              <li><strong>Outputs:</strong> downloaded artifacts, optional eval_result, <code>Summary.csv</code>, <code>Score.csv</code>, optional parquet, and report assets.</li>
+              <li><strong>Depends on:</strong> evaluator API configuration, task queue for long-running jobs in production.</li>
+            </ul>
+          </div>
+          <div class="card feature-card ops">
+            <h3>Data Management</h3>
+            <p><strong>Use when:</strong> managing a shared server or cleaning up old run outputs.</p>
+            <ul>
+              <li><strong>Shows:</strong> run folders, sizes, modified time, Summary/Score/parquet presence.</li>
+              <li><strong>Actions:</strong> create share links, download ZIP outputs, delete run folders under the data root.</li>
+              <li><strong>Safety:</strong> deletion is restricted to run-level directories under the data root.</li>
+            </ul>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Metric Analysis Pages</div>
+          <h2>Turn CSV and parquet artifacts into review signals.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Page</th><th>Prerequisite</th><th>Main Use</th><th>Compare Behavior</th></tr></thead>
+          <tbody>
+            <tr>
+              <td><strong>TP Summary</strong></td>
+              <td><code>Summary.csv</code></td>
+              <td>TP rate, RMS/STD, velocity scatter, metric distribution, density, scenario delta ranking.</td>
+              <td>Shows candidate-vs-baseline deltas such as Delta TP and metric shifts.</td>
+            </tr>
+            <tr>
+              <td><strong>Criteria Based Score</strong></td>
+              <td><code>Score.csv</code></td>
+              <td>Criteria block selection, Practical Pass Rate distribution, group means, box plots, absolute gates.</td>
+              <td>Compares Practical Pass Rate changes, gate pass/fail status, and per-scenario deltas. Uses Scenario + Dataset when Dataset exists.</td>
+            </tr>
+            <tr>
+              <td><strong>Detection Stats</strong></td>
+              <td>Parquet files</td>
+              <td>TP/FP/FN rates, distance bins, status distribution, object counts, label and scenario breakdown.</td>
+              <td>Side-by-side and delta-oriented detection metrics across selected runs.</td>
+            </tr>
+            <tr>
+              <td><strong>Prediction Evaluation</strong></td>
+              <td>Prediction parquet/artifacts</td>
+              <td>Specsheet-aligned ADE/FDE, label matrices, distance bins, polar/radial breakdowns.</td>
+              <td>ADE/FDE delta matrix and per-distance comparisons between A and B.</td>
+            </tr>
+            <tr>
+              <td><strong>Trend Insights</strong></td>
+              <td>Trend <code>metadata.yaml</code> + <code>summary.json</code></td>
+              <td>Release inventory, mAP trend, prediction trend, pass-rate trend, defect evaluation, metric atlas.</td>
+              <td>Not A/B in the same way; it groups release history over versions.</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Spatial and Visual Pages</div>
+          <h2>Use these when numbers are not enough.</h2>
+        </div>
+        <div class="grid cols-3">
+          <div class="card feature-card spatial">
+            <h3>Bounding Box Viewer</h3>
+            <p>BEV inspection from parquet data. Filter by t4dataset, topic, label, visibility, source, status, frame, and run. Best for understanding where misses and false positives happen spatially.</p>
+          </div>
+          <div class="card feature-card spatial">
+            <h3>T4 3D Viewer</h3>
+            <p>3D-oriented visual inspection and T4 visualizer integration. Best when BEV alone is not enough and users need camera or rendered context.</p>
+          </div>
+          <div class="card feature-card spatial">
+            <h3>T4 Dataset Server</h3>
+            <p>Integration helper for liveness checks, render requests, target object JSON, and camera PNG embed workflows. More operational than analysis-focused.</p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Specialized Pages</div>
+          <h2>Tools for narrower investigations and operations.</h2>
+        </div>
+        <div class="grid cols-2">
+          <div class="card feature-card analysis">
+            <h3>TLR Analysis</h3>
+            <p>Traffic Light Recognition evaluation. Use after downloading scenario data from Download Scenarios. It visualizes criteria matrices, vehicle status vs signal type, important zones, and compare-mode deltas.</p>
+          </div>
+          <div class="card feature-card advanced">
+            <h3>Parquet Debug</h3>
+            <p>Developer troubleshooting page for parquet, pkl, and result JSON. Use it when a page fails to parse data, schemas look suspicious, or criteria state needs low-level inspection.</p>
+          </div>
+          <div class="card feature-card">
+            <h3>Help</h3>
+            <p>In-app README viewer with Japanese/English switching. Useful when users are inside Streamlit and need setup or workflow reminders without leaving the app.</p>
+          </div>
+          <div class="card feature-card ops">
+            <h3>Deployment Debug</h3>
+            <p>Docker-only operations page. Checks environment, Postgres, Redis, RQ, task rows, container status, logs, and optional restricted exec. Keep access controlled.</p>
+          </div>
+        </div>
+      </div>
+    </section>
+  </main>
+  <footer class="footer"><div class="shell"><a href="index.html">Back to guide home</a></div></footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/specsheet.html b/evaluation_dashboard_app/docs/guide/specsheet.html
new file mode 100644
index 0000000..3f3fafa
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/specsheet.html
@@ -0,0 +1,274 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Specsheet Details</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>Specsheet</div>
+      <h1>Specsheet Details</h1>
+      <p class="lead">
+        The specsheet export creates a release-oriented PDF from the selected run, optional trend metadata,
+        and the external <code>perception_catalog_analyzer</code> library.
+      </p>
+    </div>
+  </header>
+
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a class="active" href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Overview</div>
+          <h2>What the specsheet export does.</h2>
+          <p class="lead">
+            The dashboard handles the UI, selected run path, metadata, progress, and local artifact setup.
+            The analyzer library handles the metric blocks, template rendering, plots, and PDF output.
+          </p>
+        </div>
+        <div class="flow">
+          <div class="step">
+            <strong>1. Select a run</strong>
+            <span><code>Overview.py</code> gathers project, version, topic, labels, and optional trend metadata.</span>
+          </div>
+          <div class="step">
+            <strong>2. Prepare files</strong>
+            <span><code>ensure_specsheet_csvs()</code> creates <code>current.csv</code> and <code>future.csv</code> when needed.</span>
+          </div>
+          <div class="step">
+            <strong>3. Build blocks</strong>
+            <span><code>SceneDataFrame.from_dir()</code> and <code>get_blocks()</code> produce abstract and detailed sections.</span>
+          </div>
+          <div class="step">
+            <strong>4. Add trend context</strong>
+            <span><code>metadata.yaml</code> and <code>summary.json</code> files are classified and converted into trend rows and plots.</span>
+          </div>
+          <div class="step">
+            <strong>5. Render PDF</strong>
+            <span><code>update_template()</code> creates HTML, then <code>specsheet()</code> writes <code>specsheet.pdf</code>.</span>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">App and Library</div>
+          <h2>The integration boundary is mostly in <code>lib/specsheet_report.py</code>.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Layer</th><th>Key file/module</th><th>Responsibility</th></tr></thead>
+          <tbody>
+            <tr>
+              <td>Streamlit UI</td>
+              <td><code>Overview.py</code></td>
+              <td>Collects project identity, version, topic, labels, selected run, trend toggle, and user-facing progress.</td>
+            </tr>
+            <tr>
+              <td>Dashboard wrapper</td>
+              <td><code>lib/specsheet_report.py</code></td>
+              <td>Defines artifact paths, adapts analyzer signatures, discovers trend files, classifies summaries, and prepares plot paths.</td>
+            </tr>
+            <tr>
+              <td>Analyzer library</td>
+              <td><code>perception_catalog_analyzer</code></td>
+              <td>Loads scene data, generates specsheet metric blocks, renders template HTML, creates plots, and writes the final PDF.</td>
+            </tr>
+            <tr>
+              <td>Local artifacts</td>
+              <td><code>data/&lt;run&gt;/...</code></td>
+              <td>Stores run CSV/parquet files, trend metadata, summary files, generated PNGs, HTML, and <code>specsheet.pdf</code>.</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Files</div>
+          <h2>Specsheet input and output files live inside the run folder.</h2>
+          <p class="lead">
+            Trend files can come from a standalone dashboard run or from a grouped release folder. In both cases,
+            each trend item needs a <code>metadata.yaml</code> file next to its <code>summary.json</code>.
+          </p>
+        </div>
+        <div class="grid cols-2">
+          <div class="card">
+            <h3>Standalone run shape</h3>
+            <div class="tree" aria-label="Standalone run file tree">
+              <div>data/my_run/</div>
+              <div>  current.csv</div>
+              <div>  future.csv</div>
+              <div>  resources/</div>
+              <div>    metadata.yaml</div>
+              <div>    summary.json</div>
+              <div>  specsheet/</div>
+              <div>    specsheet.html</div>
+              <div>    specsheet.pdf</div>
+            </div>
+          </div>
+          <div class="card">
+            <h3>Grouped release shape</h3>
+            <div class="tree" aria-label="Grouped release file tree">
+              <div>data/trend_release_full_usecase_devops/</div>
+              <div>  perception.object_recognition.objects/</div>
+              <div>    &lt;full_job_id&gt;/metadata.yaml + summary.json</div>
+              <div>    &lt;usecase_job_id&gt;/metadata.yaml + summary.json</div>
+              <div>    &lt;devops_job_id&gt;/metadata.yaml + summary.json</div>
+              <div>  specsheet/</div>
+              <div>    map_trend.png</div>
+              <div>    devops_trend.png</div>
+              <div>    specsheet.pdf</div>
+            </div>
+          </div>
+        </div>
+        <div class="callout">
+          <strong>Note:</strong> <code>discover_trend_metadata_files()</code> scans the data root for
+          metadata/summary pairs, and <code>discover_trend_release_groups()</code> decides how those files
+          should be grouped for the release PDF.
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Trend Data</div>
+          <h2>Trend summaries are classified by JSON shape.</h2>
+          <p class="lead">
+            Metadata provides release identity. The summary payload decides whether the item is a full,
+            usecase, devops, or unknown trend source.
+          </p>
+        </div>
+        <table class="table">
+          <thead><tr><th>Role</th><th>How it is recognized</th><th>Specsheet use</th></tr></thead>
+          <tbody>
+            <tr>
+              <td>Full performance</td>
+              <td><code>summary.json</code> has <code>blocks</code> containing <code>全数データセット評価</code>.</td>
+              <td>Feeds mAP, precision, recall, error, and prediction trend sections.</td>
+            </tr>
+            <tr>
+              <td>Usecase</td>
+              <td><code>summary.json</code> has <code>blocks</code> containing <code>ユースケース評価</code>.</td>
+              <td>Participates in release grouping and inventory context.</td>
+            </tr>
+            <tr>
+              <td>DevOps pass-rate</td>
+              <td>Summary is a nested dictionary without <code>blocks</code>, with category results containing <code>passed</code> and <code>total</code>.</td>
+              <td>Feeds overall pass-rate trend and pass-rate detail plots.</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">PDF Assembly</div>
+          <h2>The final PDF is assembled from analyzer HTML plus dashboard trend context.</h2>
+        </div>
+        <div class="grid cols-2">
+          <div class="card">
+            <h3><code>get_blocks()</code></h3>
+            <p>
+              Creates abstract and detailed metric fragments for labels, metrics, and evaluation type.
+              These fragments become the main technical body of the PDF.
+            </p>
+          </div>
+          <div class="card">
+            <h3><code>update_template()</code></h3>
+            <p>
+              Receives project/version metadata and trend context, then renders the analyzer template body.
+              Generated PNG paths are included when trend plots exist.
+            </p>
+          </div>
+          <div class="card">
+            <h3><code>specsheet()</code></h3>
+            <p>
+              Combines body HTML, abstract HTML, and detailed HTML, then writes
+              <code>specsheet/specsheet.html</code> and <code>specsheet/specsheet.pdf</code>.
+            </p>
+          </div>
+          <div class="card">
+            <h3>Trend plots</h3>
+            <p>
+              Full performance trends can generate <code>map_trend.png</code> and
+              <code>prediction_trend.png</code>. DevOps summaries can generate
+              <code>devops_trend.png</code> and <code>devops_trend_detail.png</code>.
+            </p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Debugging</div>
+          <h2>When a specsheet section is missing, check the data contract.</h2>
+        </div>
+        <table class="table">
+          <thead><tr><th>Symptom</th><th>What to check</th><th>Expected condition</th></tr></thead>
+          <tbody>
+            <tr>
+              <td>No trend section</td>
+              <td>Trend metadata discovery</td>
+              <td>At least one <code>metadata.yaml</code> has a sibling <code>summary.json</code>.</td>
+            </tr>
+            <tr>
+              <td>Full trend is missing</td>
+              <td>Full summary role</td>
+              <td><code>summary.json</code> has full-performance blocks and generated full trend rows.</td>
+            </tr>
+            <tr>
+              <td>Pass Rate Trend is missing</td>
+              <td>DevOps summary and plot files</td>
+              <td>DevOps rows are non-empty and <code>devops_trend.png</code> exists in the specsheet output folder.</td>
+            </tr>
+            <tr>
+              <td>PDF says no data</td>
+              <td>Template context</td>
+              <td>The relevant trend list is non-empty before calling <code>update_template()</code>.</td>
+            </tr>
+          </tbody>
+        </table>
+        <div class="code">
+          <div class="code-title">quick local verification</div>
+          <pre>PYTHONPATH=. python - <<'PY'
+from pathlib import Path
+from lib.specsheet_report import _build_trend_context, discover_trend_metadata_files
+
+ctx = _build_trend_context(discover_trend_metadata_files(), Path("/tmp/specsheet-trend-check"))
+print(len(ctx["performance_trend_data"]), len(ctx["devops_trend_data"]))
+PY</pre>
+        </div>
+      </div>
+    </section>
+  </main>
+
+  <footer class="footer">
+    <div class="shell"><a href="data_reports.html">Back to Data & Reports</a></div>
+  </footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/guide/styles.css b/evaluation_dashboard_app/docs/guide/styles.css
new file mode 100644
index 0000000..3459c9f
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/styles.css
@@ -0,0 +1,848 @@
+:root {
+  --bg: #f6f8fb;
+  --paper: #ffffff;
+  --ink: #101827;
+  --muted: #5d697d;
+  --line: #dce4f0;
+  --blue: #2563eb;
+  --teal: #0f766e;
+  --cyan: #0891b2;
+  --gold: #b7791f;
+  --red: #be123c;
+  --violet: #6d28d9;
+  --dark: #111827;
+  --shadow: 0 20px 60px rgba(17, 24, 39, .12);
+  --soft-shadow: 0 12px 30px rgba(17, 24, 39, .07);
+  --radius: 8px;
+}
+
+* { box-sizing: border-box; }
+html { scroll-behavior: smooth; }
+body {
+  margin: 0;
+  color: var(--ink);
+  background:
+    radial-gradient(circle at 14% 8%, rgba(37, 99, 235, .12), transparent 26rem),
+    radial-gradient(circle at 86% 16%, rgba(15, 118, 110, .12), transparent 24rem),
+    linear-gradient(180deg, #f8fbff 0%, #ffffff 34%, #f6f8fb 100%);
+  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+  line-height: 1.58;
+}
+
+body::before {
+  content: "";
+  position: fixed;
+  inset: 0;
+  pointer-events: none;
+  z-index: -1;
+  background-image:
+    linear-gradient(rgba(17, 24, 39, .042) 1px, transparent 1px),
+    linear-gradient(90deg, rgba(17, 24, 39, .042) 1px, transparent 1px);
+  background-size: 44px 44px;
+  mask-image: linear-gradient(180deg, rgba(0,0,0,.7), transparent 70%);
+}
+
+a { color: inherit; }
+code, pre { font-family: "SFMono-Regular", Consolas, "Liberation Mono", monospace; }
+[hidden] { display: none !important; }
+
+.shell {
+  width: min(1180px, calc(100% - 36px));
+  margin: 0 auto;
+}
+
+.hero {
+  min-height: 72vh;
+  display: grid;
+  align-items: center;
+  padding: 56px 0 36px;
+}
+
+.hero.compact {
+  min-height: 46vh;
+}
+
+.hero-grid {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr) minmax(330px, 440px);
+  gap: 46px;
+  align-items: center;
+}
+
+.eyebrow {
+  display: inline-flex;
+  align-items: center;
+  gap: 10px;
+  color: var(--teal);
+  font-size: .78rem;
+  font-weight: 900;
+  letter-spacing: .14em;
+  text-transform: uppercase;
+}
+
+.signal {
+  width: 11px;
+  height: 11px;
+  border-radius: 99px;
+  background: var(--teal);
+  animation: ping 1.9s infinite;
+}
+
+h1 {
+  margin: 16px 0 18px;
+  font-size: clamp(3.2rem, 7.6vw, 7.8rem);
+  line-height: .88;
+  letter-spacing: 0;
+  max-width: 980px;
+}
+
+h2 {
+  margin: 0 0 16px;
+  font-size: clamp(2rem, 4vw, 4.2rem);
+  line-height: 1;
+  letter-spacing: 0;
+}
+
+h3 {
+  margin: 0 0 10px;
+  font-size: 1.12rem;
+  line-height: 1.24;
+}
+
+p { margin: 0; }
+
+.lead {
+  max-width: 860px;
+  color: var(--muted);
+  font-size: 1.15rem;
+}
+
+.actions {
+  display: flex;
+  gap: 12px;
+  flex-wrap: wrap;
+  margin-top: 28px;
+}
+
+.button {
+  display: inline-flex;
+  align-items: center;
+  gap: 10px;
+  min-height: 44px;
+  padding: 11px 15px;
+  border-radius: var(--radius);
+  border: 1px solid var(--line);
+  background: var(--paper);
+  color: var(--ink);
+  text-decoration: none;
+  font-weight: 820;
+  box-shadow: 0 8px 18px rgba(17, 24, 39, .07);
+}
+
+.button.primary {
+  color: white;
+  background: var(--dark);
+  border-color: var(--dark);
+}
+
+.button:hover { transform: translateY(-1px); }
+
+.language-console {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  flex-wrap: wrap;
+  margin-top: 24px;
+  padding: 8px;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  background: rgba(255,255,255,.78);
+  box-shadow: var(--soft-shadow);
+  color: var(--muted);
+  font-weight: 850;
+  font-size: .9rem;
+}
+
+.lang-button {
+  min-height: 34px;
+  border: 1px solid var(--line);
+  border-radius: 7px;
+  padding: 7px 11px;
+  background: white;
+  color: #334155;
+  font: inherit;
+  font-weight: 900;
+  cursor: pointer;
+}
+
+.lang-button.active {
+  color: white;
+  background: var(--dark);
+  border-color: var(--dark);
+}
+
+.hero-console {
+  border-top: 4px solid var(--teal);
+}
+
+.metric-grid {
+  margin-top: 26px;
+}
+
+.metric {
+  min-height: 140px;
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+}
+
+.metric strong {
+  font-size: 1.05rem;
+}
+
+.metric span {
+  color: var(--muted);
+}
+
+nav {
+  position: sticky;
+  top: 0;
+  z-index: 30;
+  background: rgba(248, 251, 255, .88);
+  border-block: 1px solid rgba(220,228,240,.9);
+  backdrop-filter: blur(14px);
+}
+
+.nav-inner {
+  display: flex;
+  gap: 8px;
+  align-items: center;
+  padding: 12px 0;
+  overflow-x: auto;
+}
+
+.nav-inner a {
+  text-decoration: none;
+  white-space: nowrap;
+  color: #334155;
+  font-size: .88rem;
+  font-weight: 820;
+  padding: 8px 10px;
+  border-radius: 7px;
+}
+
+.nav-inner a:hover, .nav-inner a.active {
+  background: white;
+  color: var(--blue);
+}
+
+section {
+  padding: 78px 0;
+  position: relative;
+}
+
+.section-head {
+  max-width: 900px;
+  margin-bottom: 32px;
+}
+
+.kicker {
+  color: var(--blue);
+  font-size: .78rem;
+  font-weight: 950;
+  letter-spacing: .14em;
+  text-transform: uppercase;
+  margin-bottom: 12px;
+}
+
+.grid { display: grid; gap: 18px; }
+.cols-2 { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+.cols-3 { grid-template-columns: repeat(3, minmax(0, 1fr)); }
+.cols-4 { grid-template-columns: repeat(4, minmax(0, 1fr)); }
+
+.card {
+  background: rgba(255,255,255,.92);
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  padding: 20px;
+  box-shadow: var(--soft-shadow);
+}
+
+.card p, .card li { color: var(--muted); }
+.card ul { margin: 12px 0 0; padding-left: 18px; }
+
+.number {
+  width: 34px;
+  height: 34px;
+  display: inline-grid;
+  place-items: center;
+  border-radius: 8px;
+  background: var(--blue);
+  color: white;
+  font-weight: 950;
+  margin-bottom: 13px;
+}
+
+.flow {
+  display: grid;
+  grid-template-columns: repeat(5, minmax(132px, 1fr));
+  gap: 12px;
+  align-items: stretch;
+  margin-top: 24px;
+}
+
+.compact-flow {
+  grid-template-columns: 1fr;
+  margin-top: 0;
+}
+
+.compact-flow .step {
+  min-height: auto;
+}
+
+.compact-flow .step::after {
+  display: none;
+}
+
+.step {
+  min-height: 155px;
+  padding: 16px;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  background: white;
+  position: relative;
+  overflow: hidden;
+  animation: lift .6s ease both;
+}
+
+.step:nth-child(2) { animation-delay: .08s; }
+.step:nth-child(3) { animation-delay: .16s; }
+.step:nth-child(4) { animation-delay: .24s; }
+.step:nth-child(5) { animation-delay: .32s; }
+
+.step::after {
+  content: "";
+  position: absolute;
+  top: 50%;
+  right: -23px;
+  width: 42px;
+  height: 2px;
+  background: linear-gradient(90deg, var(--blue), transparent);
+}
+
+.step:last-child::after { display: none; }
+.step strong { display: block; margin-bottom: 8px; }
+.step span { color: var(--muted); font-size: .9rem; }
+
+.split {
+  display: grid;
+  grid-template-columns: minmax(0, 1.05fr) minmax(0, .95fr);
+  gap: 20px;
+  align-items: start;
+}
+
+.media {
+  overflow: hidden;
+  border-radius: var(--radius);
+  border: 1px solid var(--line);
+  background: white;
+  box-shadow: var(--soft-shadow);
+}
+
+.media img {
+  width: 100%;
+  display: block;
+  object-fit: cover;
+}
+
+.media-grid {
+  display: grid;
+  grid-template-columns: repeat(3, minmax(0, 1fr));
+  gap: 18px;
+  margin-top: 28px;
+}
+
+.caption {
+  color: var(--muted);
+  font-size: .86rem;
+  padding: 12px 14px;
+  border-top: 1px solid var(--line);
+  background: #fbfdff;
+}
+
+.code {
+  background: #101827;
+  color: #dbeafe;
+  border-radius: var(--radius);
+  border: 1px solid rgba(255,255,255,.08);
+  overflow: hidden;
+  box-shadow: 0 16px 44px rgba(15, 23, 42, .18);
+}
+
+.code-title {
+  display: flex;
+  justify-content: space-between;
+  gap: 12px;
+  padding: 12px 15px;
+  border-bottom: 1px solid rgba(255,255,255,.1);
+  color: #bfdbfe;
+  font-weight: 850;
+  font-size: .88rem;
+}
+
+pre {
+  margin: 0;
+  padding: 17px;
+  overflow: auto;
+  font-size: .82rem;
+  line-height: 1.55;
+}
+
+.tree {
+  background: white;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  padding: 18px;
+  font-family: "SFMono-Regular", Consolas, monospace;
+  overflow-x: auto;
+  color: #334155;
+  box-shadow: var(--soft-shadow);
+}
+
+.tree div {
+  white-space: nowrap;
+  opacity: 0;
+  transform: translateX(-10px);
+  animation: treeIn .45s ease forwards;
+}
+
+.tree div:nth-child(2) { animation-delay: .04s; }
+.tree div:nth-child(3) { animation-delay: .08s; }
+.tree div:nth-child(4) { animation-delay: .12s; }
+.tree div:nth-child(5) { animation-delay: .16s; }
+.tree div:nth-child(6) { animation-delay: .20s; }
+.tree div:nth-child(7) { animation-delay: .24s; }
+.tree div:nth-child(8) { animation-delay: .28s; }
+.tree div:nth-child(9) { animation-delay: .32s; }
+.tree div:nth-child(10) { animation-delay: .36s; }
+
+.pill {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  min-height: 28px;
+  padding: 5px 9px;
+  border: 1px solid var(--line);
+  border-radius: 999px;
+  background: #fbfdff;
+  color: #334155;
+  font-size: .8rem;
+  font-weight: 780;
+  margin: 3px 4px 3px 0;
+}
+
+.callout {
+  border-left: 5px solid var(--teal);
+  background: #ecfdf5;
+  color: #123f38;
+  border-radius: var(--radius);
+  padding: 18px;
+}
+
+.callout.warn {
+  border-left-color: var(--gold);
+  background: #fff8e7;
+  color: #513a13;
+}
+
+.sequence {
+  display: grid;
+  grid-template-columns: repeat(var(--cols, 5), minmax(110px, 1fr));
+  gap: 10px;
+  margin: 22px 0;
+  position: relative;
+}
+
+.actor {
+  min-height: 78px;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  background: white;
+  display: grid;
+  place-items: center;
+  text-align: center;
+  padding: 12px;
+  box-shadow: var(--soft-shadow);
+  font-weight: 900;
+}
+
+.actor small {
+  display: block;
+  color: var(--muted);
+  font-weight: 750;
+  margin-top: 4px;
+}
+
+.message {
+  grid-column: 1 / -1;
+  display: grid;
+  grid-template-columns: subgrid;
+  min-height: 44px;
+  align-items: center;
+}
+
+.arrow {
+  height: 28px;
+  border-top: 2px solid var(--blue);
+  position: relative;
+  display: flex;
+  align-items: flex-start;
+  justify-content: center;
+  color: var(--muted);
+  font-size: .78rem;
+  font-weight: 800;
+  padding-top: 6px;
+}
+
+.arrow::after {
+  content: "";
+  position: absolute;
+  right: -2px;
+  top: -6px;
+  border-left: 9px solid var(--blue);
+  border-top: 5px solid transparent;
+  border-bottom: 5px solid transparent;
+}
+
+.arrow.back {
+  border-color: var(--teal);
+}
+
+.arrow.back::after {
+  right: auto;
+  left: -2px;
+  border-left: 0;
+  border-right: 9px solid var(--teal);
+}
+
+.span-1-2 { grid-column: 1 / 3; }
+.span-2-3 { grid-column: 2 / 4; }
+.span-3-4 { grid-column: 3 / 5; }
+.span-4-5 { grid-column: 4 / 6; }
+.span-1-3 { grid-column: 1 / 4; }
+.span-2-4 { grid-column: 2 / 5; }
+.span-3-5 { grid-column: 3 / 6; }
+.span-2-5 { grid-column: 2 / 6; }
+
+.system-map {
+  display: grid;
+  grid-template-columns: repeat(4, minmax(0, 1fr));
+  gap: 14px;
+  align-items: stretch;
+  margin-top: 22px;
+}
+
+.system-node {
+  background: white;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  padding: 16px;
+  min-height: 132px;
+  box-shadow: var(--soft-shadow);
+  position: relative;
+  overflow: hidden;
+}
+
+.system-node::before {
+  content: "";
+  position: absolute;
+  inset: 0 auto 0 0;
+  width: 5px;
+  background: var(--blue);
+}
+
+.system-node.teal::before { background: var(--teal); }
+.system-node.gold::before { background: var(--gold); }
+.system-node.cyan::before { background: var(--cyan); }
+.system-node.violet::before { background: var(--violet); }
+.system-node.red::before { background: var(--red); }
+
+.system-node p {
+  color: var(--muted);
+}
+
+.mini-diagram {
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  background: white;
+  padding: 18px;
+  box-shadow: var(--soft-shadow);
+  overflow-x: auto;
+}
+
+.real-diagram {
+  background: white;
+  border: 1px solid var(--line);
+  border-radius: var(--radius);
+  box-shadow: var(--soft-shadow);
+  overflow: auto;
+  margin: 22px 0;
+}
+
+.real-diagram svg {
+  display: block;
+  min-width: 980px;
+  width: 100%;
+  height: auto;
+}
+
+.svg-title {
+  font: 800 18px Inter, ui-sans-serif, system-ui, sans-serif;
+  fill: #101827;
+}
+
+.svg-actor {
+  fill: #ffffff;
+  stroke: #cbd5e1;
+  stroke-width: 1.2;
+}
+
+.svg-actor-text {
+  font: 800 13px Inter, ui-sans-serif, system-ui, sans-serif;
+  fill: #101827;
+}
+
+.svg-small {
+  font: 700 11px Inter, ui-sans-serif, system-ui, sans-serif;
+  fill: #64748b;
+}
+
+.svg-line {
+  stroke: #cbd5e1;
+  stroke-width: 1.2;
+  stroke-dasharray: 5 6;
+}
+
+.svg-msg {
+  stroke: #2563eb;
+  stroke-width: 2;
+  fill: none;
+  marker-end: url(#arrow-blue);
+}
+
+.svg-msg-return {
+  stroke: #0f766e;
+  stroke-width: 2;
+  fill: none;
+  stroke-dasharray: 7 5;
+  marker-end: url(#arrow-teal);
+}
+
+.svg-note {
+  fill: #f8fafc;
+  stroke: #dbe5f2;
+}
+
+.svg-note-warn {
+  fill: #fff8e7;
+  stroke: #f2d38b;
+}
+
+.svg-note-text {
+  font: 700 12px Inter, ui-sans-serif, system-ui, sans-serif;
+  fill: #334155;
+}
+
+.svg-step {
+  font: 800 12px Inter, ui-sans-serif, system-ui, sans-serif;
+  fill: #1e3a8a;
+}
+
+.payload-grid {
+  display: grid;
+  grid-template-columns: repeat(3, minmax(0, 1fr));
+  gap: 14px;
+  margin-top: 18px;
+}
+
+.payload-card {
+  background: #101827;
+  color: #dbeafe;
+  border-radius: var(--radius);
+  border: 1px solid rgba(255,255,255,.08);
+  overflow: hidden;
+}
+
+.payload-card h3 {
+  padding: 12px 14px;
+  border-bottom: 1px solid rgba(255,255,255,.1);
+  color: #bfdbfe;
+  font-size: .92rem;
+}
+
+.payload-card pre {
+  font-size: .76rem;
+}
+
+.legend-row {
+  display: flex;
+  gap: 10px;
+  flex-wrap: wrap;
+  margin-top: 12px;
+}
+
+.legend-item {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  color: var(--muted);
+  font-weight: 760;
+  font-size: .86rem;
+}
+
+.legend-swatch {
+  width: 22px;
+  height: 4px;
+  border-radius: 999px;
+  background: var(--blue);
+}
+
+.legend-swatch.return {
+  background: repeating-linear-gradient(90deg, var(--teal) 0 7px, transparent 7px 12px);
+  border: 1px solid rgba(15,118,110,.25);
+}
+
+.swimlanes {
+  display: grid;
+  gap: 12px;
+}
+
+.swimlane {
+  display: grid;
+  grid-template-columns: 180px minmax(0, 1fr);
+  gap: 12px;
+  align-items: stretch;
+}
+
+.swimlane-label {
+  border-radius: var(--radius);
+  background: var(--dark);
+  color: white;
+  padding: 14px;
+  display: grid;
+  align-items: center;
+  font-weight: 900;
+}
+
+.swimlane-flow {
+  display: flex;
+  gap: 10px;
+  flex-wrap: wrap;
+  align-items: center;
+  border: 1px solid var(--line);
+  background: #fbfdff;
+  border-radius: var(--radius);
+  padding: 12px;
+}
+
+.chip {
+  border: 1px solid var(--line);
+  border-radius: 8px;
+  background: white;
+  padding: 9px 10px;
+  font-size: .84rem;
+  font-weight: 820;
+  color: #334155;
+}
+
+.table,
+.table-wrap table {
+  width: 100%;
+  border-collapse: collapse;
+  overflow: hidden;
+  border-radius: var(--radius);
+  background: white;
+  border: 1px solid var(--line);
+  box-shadow: var(--soft-shadow);
+}
+
+.table-wrap {
+  overflow-x: auto;
+  border-radius: var(--radius);
+}
+
+.table th, .table td,
+.table-wrap th, .table-wrap td {
+  text-align: left;
+  padding: 12px 14px;
+  border-bottom: 1px solid var(--line);
+  vertical-align: top;
+}
+
+.table th,
+.table-wrap th {
+  background: #f8fafc;
+  font-size: .82rem;
+  text-transform: uppercase;
+  letter-spacing: .08em;
+}
+
+.table td,
+.table-wrap td { color: var(--muted); }
+
+.feature-card {
+  border-top: 5px solid var(--blue);
+}
+.feature-card.analysis { border-top-color: var(--teal); }
+.feature-card.spatial { border-top-color: var(--cyan); }
+.feature-card.ops { border-top-color: var(--gold); }
+.feature-card.advanced { border-top-color: var(--violet); }
+
+.footer {
+  padding: 48px 0 70px;
+  border-top: 1px solid var(--line);
+  color: var(--muted);
+}
+
+.reveal {
+  opacity: 0;
+  transform: translateY(18px);
+  transition: opacity .6s ease, transform .6s ease;
+}
+
+.reveal.in {
+  opacity: 1;
+  transform: translateY(0);
+}
+
+@keyframes ping {
+  0% { box-shadow: 0 0 0 0 rgba(15, 118, 110, .45); }
+  72% { box-shadow: 0 0 0 13px rgba(15, 118, 110, 0); }
+  100% { box-shadow: 0 0 0 0 rgba(15, 118, 110, 0); }
+}
+
+@keyframes lift {
+  from { opacity: 0; transform: translateY(18px); }
+  to { opacity: 1; transform: translateY(0); }
+}
+
+@keyframes treeIn {
+  to { opacity: 1; transform: translateX(0); }
+}
+
+@media (max-width: 980px) {
+  .hero-grid, .split, .cols-2, .cols-3, .cols-4, .media-grid {
+    grid-template-columns: 1fr;
+  }
+  .flow { grid-template-columns: 1fr; }
+  .step::after { display: none; }
+}
+
+@media (prefers-reduced-motion: reduce) {
+  *, *::before, *::after {
+    animation-duration: 0.01ms !important;
+    animation-iteration-count: 1 !important;
+    transition-duration: 0.01ms !important;
+    scroll-behavior: auto !important;
+  }
+}
diff --git a/evaluation_dashboard_app/docs/guide/visual_systems.html b/evaluation_dashboard_app/docs/guide/visual_systems.html
new file mode 100644
index 0000000..384021c
--- /dev/null
+++ b/evaluation_dashboard_app/docs/guide/visual_systems.html
@@ -0,0 +1,555 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Evaluation Dashboard Technical Diagrams</title>
+  <link rel="stylesheet" href="styles.css" />
+</head>
+<body>
+  <header class="hero compact">
+    <div class="shell">
+      <div class="eyebrow"><span class="signal"></span>Technical Diagrams</div>
+      <h1>Real Flows</h1>
+      <p class="lead">
+        Precise diagrams for the parts users and maintainers actually ask about: download/eval execution,
+        compare-mode state, page artifact dependencies, T4 camera rendering, T4 Three.js 3D overlays,
+        production queueing, and report generation.
+      </p>
+    </div>
+  </header>
+  <nav>
+    <div class="shell nav-inner">
+      <a href="index.html">Home</a>
+      <a href="getting_started.html">How to Use</a>
+      <a href="pages.html">Pages</a>
+      <a href="data_reports.html">Data & Reports</a>
+      <a href="specsheet.html">Specsheet</a>
+      <a href="deployment.html">Deployment</a>
+      <a class="active" href="visual_systems.html">Diagrams</a>
+    </div>
+  </nav>
+
+  <main>
+    <section id="artifact-map" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Artifact Dependency Map</div>
+          <h2>Which generated files unlock which pages?</h2>
+          <p class="lead">
+            This is the first diagram to check when a user asks why a page is empty.
+            Most UI behavior follows directly from whether these files exist under the selected run.
+          </p>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1200 610" role="img" aria-label="Artifact dependency map">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+              <marker id="arrow-teal" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#0f766e"></path>
+              </marker>
+            </defs>
+            <text x="40" y="42" class="svg-title">Run folder artifact dependency map</text>
+
+            <rect x="40" y="80" width="210" height="410" rx="10" class="svg-note"></rect>
+            <text x="70" y="118" class="svg-actor-text">data/&lt;run&gt;/</text>
+            <text x="70" y="152" class="svg-note-text">Summary.csv</text>
+            <text x="70" y="192" class="svg-note-text">Score.csv</text>
+            <text x="70" y="232" class="svg-note-text">*.parquet</text>
+            <text x="70" y="272" class="svg-note-text">result.txt / score.json</text>
+            <text x="70" y="312" class="svg-note-text">resources/metadata.yaml</text>
+            <text x="70" y="340" class="svg-note-text">resources/summary.json</text>
+            <text x="70" y="380" class="svg-note-text">specsheet/*.png</text>
+            <text x="70" y="408" class="svg-note-text">specsheet/specsheet.pdf</text>
+
+            <rect x="390" y="70" width="230" height="70" rx="10" class="svg-actor"></rect>
+            <text x="425" y="101" class="svg-actor-text">Overview</text>
+            <text x="425" y="122" class="svg-small">run selection + summary</text>
+
+            <rect x="390" y="165" width="230" height="70" rx="10" class="svg-actor"></rect>
+            <text x="425" y="196" class="svg-actor-text">TP Summary</text>
+            <text x="425" y="217" class="svg-small">TP/RMS/velocity charts</text>
+
+            <rect x="390" y="260" width="230" height="70" rx="10" class="svg-actor"></rect>
+            <text x="425" y="291" class="svg-actor-text">Criteria Score</text>
+            <text x="425" y="312" class="svg-small">pass rate + gates</text>
+
+            <rect x="730" y="70" width="250" height="70" rx="10" class="svg-actor"></rect>
+            <text x="765" y="101" class="svg-actor-text">Detection Stats</text>
+            <text x="765" y="122" class="svg-small">DuckDB + parquet scan</text>
+
+            <rect x="730" y="165" width="250" height="70" rx="10" class="svg-actor"></rect>
+            <text x="765" y="196" class="svg-actor-text">Bounding Box / T4 3D</text>
+            <text x="765" y="217" class="svg-small">BEV + Three.js overlays</text>
+
+            <rect x="730" y="260" width="250" height="70" rx="10" class="svg-actor"></rect>
+            <text x="765" y="291" class="svg-actor-text">Prediction Evaluation</text>
+            <text x="765" y="312" class="svg-small">ADE/FDE matrices</text>
+
+            <rect x="730" y="355" width="250" height="70" rx="10" class="svg-actor"></rect>
+            <text x="765" y="386" class="svg-actor-text">Trend Insights</text>
+            <text x="765" y="407" class="svg-small">release metadata + summaries</text>
+
+            <rect x="390" y="355" width="230" height="70" rx="10" class="svg-actor"></rect>
+            <text x="425" y="386" class="svg-actor-text">Download / Workflow</text>
+            <text x="425" y="407" class="svg-small">creates / refreshes files</text>
+
+            <rect x="730" y="450" width="250" height="70" rx="10" class="svg-actor"></rect>
+            <text x="765" y="481" class="svg-actor-text">Reports</text>
+            <text x="765" y="502" class="svg-small">dashboard PDF + specsheet</text>
+
+            <path d="M250 152 C310 130,330 110,390 105" class="svg-msg"></path>
+            <path d="M250 152 C315 165,330 190,390 200" class="svg-msg"></path>
+            <path d="M250 192 C315 210,330 275,390 295" class="svg-msg"></path>
+            <path d="M250 232 C470 95,565 95,730 105" class="svg-msg"></path>
+            <path d="M250 232 C470 195,565 195,730 200" class="svg-msg"></path>
+            <path d="M250 232 C470 300,565 300,730 295" class="svg-msg"></path>
+            <path d="M250 326 C470 370,565 382,730 390" class="svg-msg"></path>
+            <path d="M250 394 C470 470,565 480,730 485" class="svg-msg"></path>
+            <path d="M505 355 C420 285,360 230,250 200" class="svg-msg-return"></path>
+            <text x="280" y="545" class="svg-note-text">Blue = page reads artifact. Dashed green = workflow produces or refreshes artifact.</text>
+          </svg>
+        </div>
+      </div>
+    </section>
+
+    <section id="download-sequence" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Real Sequence</div>
+          <h2>Download Results -> Eval Results -> Overview selection.</h2>
+          <p class="lead">
+            This sequence shows both inline and production task-queue modes. In production, the UI does not run long jobs directly.
+          </p>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1220 760" role="img" aria-label="Download and eval sequence diagram">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+              <marker id="arrow-teal" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#0f766e"></path>
+              </marker>
+            </defs>
+            <text x="40" y="40" class="svg-title">Sequence: user downloads a job, generates CSV artifacts, then opens Overview</text>
+
+            <g>
+              <rect x="40" y="70" width="150" height="58" rx="8" class="svg-actor"></rect>
+              <text x="82" y="102" class="svg-actor-text">User</text>
+              <line x1="115" y1="128" x2="115" y2="710" class="svg-line"></line>
+            </g>
+            <g>
+              <rect x="240" y="70" width="170" height="58" rx="8" class="svg-actor"></rect>
+              <text x="278" y="95" class="svg-actor-text">Streamlit</text>
+              <text x="282" y="116" class="svg-small">Download page</text>
+              <line x1="325" y1="128" x2="325" y2="710" class="svg-line"></line>
+            </g>
+            <g>
+              <rect x="465" y="70" width="160" height="58" rx="8" class="svg-actor"></rect>
+              <text x="497" y="95" class="svg-actor-text">Redis / RQ</text>
+              <text x="507" y="116" class="svg-small">queue mode</text>
+              <line x1="545" y1="128" x2="545" y2="710" class="svg-line"></line>
+            </g>
+            <g>
+              <rect x="680" y="70" width="160" height="58" rx="8" class="svg-actor"></rect>
+              <text x="724" y="95" class="svg-actor-text">Worker</text>
+              <text x="710" y="116" class="svg-small">heavy tasks</text>
+              <line x1="760" y1="128" x2="760" y2="710" class="svg-line"></line>
+            </g>
+            <g>
+              <rect x="895" y="70" width="160" height="58" rx="8" class="svg-actor"></rect>
+              <text x="930" y="95" class="svg-actor-text">Evaluator</text>
+              <text x="934" y="116" class="svg-small">API / files</text>
+              <line x1="975" y1="128" x2="975" y2="710" class="svg-line"></line>
+            </g>
+            <g>
+              <rect x="1100" y="70" width="90" height="58" rx="8" class="svg-actor"></rect>
+              <text x="1120" y="95" class="svg-actor-text">Data</text>
+              <text x="1117" y="116" class="svg-small">root</text>
+              <line x1="1145" y1="128" x2="1145" y2="710" class="svg-line"></line>
+            </g>
+
+            <path d="M115 165 L325 165" class="svg-msg"></path>
+            <text x="140" y="156" class="svg-step">1. Submit Project ID, Job ID, output path</text>
+
+            <path d="M325 215 L545 215" class="svg-msg"></path>
+            <text x="350" y="206" class="svg-step">2a. Queue task if USE_TASK_QUEUE=true</text>
+            <path d="M545 255 L760 255" class="svg-msg"></path>
+            <text x="570" y="246" class="svg-step">2b. Worker consumes RQ job</text>
+
+            <rect x="268" y="284" width="235" height="54" rx="8" class="svg-note-warn"></rect>
+            <text x="284" y="306" class="svg-note-text">If queue mode is off, Streamlit</text>
+            <text x="284" y="324" class="svg-note-text">runs this work inline.</text>
+
+            <path d="M760 370 L975 370" class="svg-msg"></path>
+            <text x="785" y="361" class="svg-step">3. Download archives / result JSON / scenario data</text>
+            <path d="M975 410 L760 410" class="svg-msg-return"></path>
+            <text x="790" y="401" class="svg-step">4. API response / downloaded files</text>
+
+            <path d="M760 465 L1145 465" class="svg-msg"></path>
+            <text x="790" y="456" class="svg-step">5. Write result.txt, score.json, extracted archives</text>
+
+            <path d="M115 520 L325 520" class="svg-msg"></path>
+            <text x="140" y="511" class="svg-step">6. User runs Eval Results for same root</text>
+            <path d="M760 565 L1145 565" class="svg-msg"></path>
+            <text x="790" y="556" class="svg-step">7. Generate Summary.csv + Score.csv</text>
+            <path d="M325 620 L1145 620" class="svg-msg-return"></path>
+            <text x="350" y="611" class="svg-step">8. Overview lists data/&lt;run&gt; and reads generated artifacts</text>
+          </svg>
+        </div>
+      </div>
+    </section>
+
+    <section id="t4-camera-render" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">T4 Camera Rendering</div>
+          <h2>Bounding Box Viewer / T4 Dataset Server: HTTP render path.</h2>
+          <p class="lead">
+            This path is for camera PNGs or HTML camera render previews. It is separate from the Three.js 3D overlay path below.
+          </p>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1220 680" role="img" aria-label="T4 camera render sequence">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+              <marker id="arrow-teal" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#0f766e"></path>
+              </marker>
+            </defs>
+            <text x="40" y="40" class="svg-title">Sequence: camera preview render through T4 visualizer HTTP API</text>
+
+            <rect x="45" y="78" width="150" height="58" rx="8" class="svg-actor"></rect>
+            <text x="82" y="110" class="svg-actor-text">Streamlit</text>
+            <line x1="120" y1="136" x2="120" y2="625" class="svg-line"></line>
+            <rect x="265" y="78" width="170" height="58" rx="8" class="svg-actor"></rect>
+            <text x="294" y="103" class="svg-actor-text">T4 Client</text>
+            <text x="290" y="124" class="svg-small">requests wrapper</text>
+            <line x1="350" y1="136" x2="350" y2="625" class="svg-line"></line>
+            <rect x="505" y="78" width="170" height="58" rx="8" class="svg-actor"></rect>
+            <text x="533" y="103" class="svg-actor-text">T4 Server</text>
+            <text x="535" y="124" class="svg-small">FastAPI</text>
+            <line x1="590" y1="136" x2="590" y2="625" class="svg-line"></line>
+            <rect x="745" y="78" width="170" height="58" rx="8" class="svg-actor"></rect>
+            <text x="778" y="103" class="svg-actor-text">Dataset</text>
+            <text x="770" y="124" class="svg-small">local T4 files</text>
+            <line x1="830" y1="136" x2="830" y2="625" class="svg-line"></line>
+            <rect x="985" y="78" width="170" height="58" rx="8" class="svg-actor"></rect>
+            <text x="1028" y="103" class="svg-actor-text">Browser</text>
+            <text x="1012" y="124" class="svg-small">PNG / iframe</text>
+            <line x1="1070" y1="136" x2="1070" y2="625" class="svg-line"></line>
+
+            <path d="M120 175 L350 175" class="svg-msg"></path>
+            <text x="145" y="166" class="svg-step">1. User selects server base URL, dataset, scenario, frame</text>
+            <path d="M350 220 L590 220" class="svg-msg"></path>
+            <text x="375" y="211" class="svg-step">2. GET /health, /datasets, /datasets/{id}/scenarios</text>
+            <path d="M590 260 L830 260" class="svg-msg"></path>
+            <text x="615" y="251" class="svg-step">3. Server reads available datasets and scene metadata</text>
+            <path d="M590 305 L350 305" class="svg-msg-return"></path>
+            <text x="375" y="296" class="svg-step">4. JSON lists: ids, scenarios, frame counts</text>
+
+            <path d="M120 365 L350 365" class="svg-msg"></path>
+            <text x="145" y="356" class="svg-step">5. Build RenderRequest from UI and optional GT rows</text>
+            <path d="M350 410 L590 410" class="svg-msg"></path>
+            <text x="375" y="401" class="svg-step">6. POST /render {dataset, scenario, frame, target_objects}</text>
+            <path d="M590 455 L830 455" class="svg-msg"></path>
+            <text x="615" y="446" class="svg-step">7. Load camera/sample data and draw annotations</text>
+            <path d="M590 505 L350 505" class="svg-msg-return"></path>
+            <text x="375" y="496" class="svg-step">8. RenderResult JSON with images[].png_base64</text>
+            <path d="M350 555 L1070 555" class="svg-msg-return"></path>
+            <text x="375" y="546" class="svg-step">9. Streamlit decodes/display PNGs or embeds /render/html iframe</text>
+          </svg>
+        </div>
+        <div class="payload-grid">
+          <div class="payload-card">
+            <h3>RenderRequest body</h3>
+            <pre>{
+  "t4dataset_id": "...",
+  "scenario_name": "...",
+  "frame_index": 42,
+  "target_objects": [{ "uuid": "...", "x": 1.2 }],
+  "show_annotations": true,
+  "crop_cameras": false
+}</pre>
+          </div>
+          <div class="payload-card">
+            <h3>RenderResult response</h3>
+            <pre>{
+  "sample_token": "...",
+  "timestamp_us": 123,
+  "images": [
+    { "label": "CAM_FRONT", "png_base64": "..." }
+  ],
+  "elapsed_ms": 812.4
+}</pre>
+          </div>
+          <div class="payload-card">
+            <h3>Source code touchpoints</h3>
+            <pre>lib/t4_visualizer_client.py
+lib/t4_dataset_embed.py
+pages/11_T4_Dataset_Server.py
+pages/4_Bounding_Box_Viewer.py</pre>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section id="t4-three-viewer" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">T4 3D Rendering</div>
+          <h2>Three.js overlay path: parquet -> all-frame layers -> iframe postMessage.</h2>
+          <p class="lead">
+            This is the precise flow used by <code>pages/5_T4_3D_Viewer.py</code>. The app deliberately uses
+            the viewer’s own frame slider: Streamlit loads the iframe once, sends all frame overlays, and the
+            viewer selects overlays internally as the user scrubs time.
+          </p>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1280 900" role="img" aria-label="T4 Three.js 3D sequence diagram">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+              <marker id="arrow-teal" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#0f766e"></path>
+              </marker>
+            </defs>
+            <text x="40" y="40" class="svg-title">Sequence: T4 3D Viewer iframe + postMessage overlay synchronization</text>
+
+            <rect x="35" y="76" width="145" height="60" rx="8" class="svg-actor"></rect>
+            <text x="74" y="101" class="svg-actor-text">User</text>
+            <text x="58" y="122" class="svg-small">browser</text>
+            <line x1="107" y1="136" x2="107" y2="850" class="svg-line"></line>
+
+            <rect x="220" y="76" width="165" height="60" rx="8" class="svg-actor"></rect>
+            <text x="253" y="101" class="svg-actor-text">Streamlit</text>
+            <text x="240" y="122" class="svg-small">5_T4_3D_Viewer</text>
+            <line x1="302" y1="136" x2="302" y2="850" class="svg-line"></line>
+
+            <rect x="425" y="76" width="155" height="60" rx="8" class="svg-actor"></rect>
+            <text x="468" y="101" class="svg-actor-text">DuckDB</text>
+            <text x="448" y="122" class="svg-small">parquet_scan</text>
+            <line x1="502" y1="136" x2="502" y2="850" class="svg-line"></line>
+
+            <rect x="620" y="76" width="165" height="60" rx="8" class="svg-actor"></rect>
+            <text x="654" y="101" class="svg-actor-text">Layer Builder</text>
+            <text x="642" y="122" class="svg-small">t4_three_layers</text>
+            <line x1="702" y1="136" x2="702" y2="850" class="svg-line"></line>
+
+            <rect x="825" y="76" width="165" height="60" rx="8" class="svg-actor"></rect>
+            <text x="858" y="101" class="svg-actor-text">T4 Server</text>
+            <text x="842" y="122" class="svg-small">/viewer/three</text>
+            <line x1="907" y1="136" x2="907" y2="850" class="svg-line"></line>
+
+            <rect x="1030" y="76" width="195" height="60" rx="8" class="svg-actor"></rect>
+            <text x="1074" y="101" class="svg-actor-text">Three.js iframe</text>
+            <text x="1060" y="122" class="svg-small">viewer runtime</text>
+            <line x1="1127" y1="136" x2="1127" y2="850" class="svg-line"></line>
+
+            <path d="M107 170 L302 170" class="svg-msg"></path>
+            <text x="128" y="161" class="svg-step">1. Open T4 3D Viewer after Overview selected run(s)</text>
+
+            <path d="M302 215 L502 215" class="svg-msg"></path>
+            <text x="325" y="206" class="svg-step">2. DESCRIBE + SELECT parquet_scan(?) with filters</text>
+
+            <path d="M502 260 L302 260" class="svg-msg-return"></path>
+            <text x="325" y="251" class="svg-step">3. DataFrame rows: frame_index, source GT/EST, status, geometry</text>
+
+            <rect x="225" y="292" width="330" height="78" rx="8" class="svg-note"></rect>
+            <text x="242" y="315" class="svg-note-text">Filters come from shared BEV keys:</text>
+            <text x="242" y="335" class="svg-note-text">suite, scenario, t4dataset, topic, label, visibility, runs.</text>
+            <text x="242" y="355" class="svg-note-text">frame_index is normalized to int.</text>
+
+            <path d="M302 400 L907 400" class="svg-msg"></path>
+            <text x="325" y="391" class="svg-step">4. GET /datasets/{t4dataset_id}/availability</text>
+            <path d="M907 445 L302 445" class="svg-msg-return"></path>
+            <text x="325" y="436" class="svg-step">5. { available: true/false, dataset path metadata }</text>
+
+            <path d="M302 500 L702 500" class="svg-msg"></path>
+            <text x="325" y="491" class="svg-step">6. build_three_layer_payload_all_frames(df)</text>
+
+            <rect x="624" y="527" width="290" height="112" rx="8" class="svg-note-warn"></rect>
+            <text x="642" y="550" class="svg-note-text">For each frame_index:</text>
+            <text x="642" y="570" class="svg-note-text">source == GT -> gt[] boxes</text>
+            <text x="642" y="590" class="svg-note-text">source == EST -> pred[] boxes</text>
+            <text x="642" y="610" class="svg-note-text">TP pair_uuid/uuid -> matched_pairs[]</text>
+
+            <path d="M702 670 L302 670" class="svg-msg-return"></path>
+            <text x="325" y="661" class="svg-step">7. Payload: { type: "bbox_layers_by_frame", frames: { "0": ... } }</text>
+
+            <path d="M302 720 L907 720" class="svg-msg"></path>
+            <text x="325" y="711" class="svg-step">8. iframe src = {base}/viewer/three?t4dataset_id=...&amp;scenario_name=...&amp;frame_index=min</text>
+
+            <path d="M907 760 L1127 760" class="svg-msg"></path>
+            <text x="930" y="751" class="svg-step">9. Viewer loads dataset/scenario and its own time slider</text>
+
+            <path d="M302 805 L1127 805" class="svg-msg"></path>
+            <text x="325" y="796" class="svg-step">10. JS hex-decodes payload and iframe.contentWindow.postMessage(payload, targetOrigin)</text>
+
+            <path d="M1127 840 C1160 840,1160 810,1127 810" class="svg-msg-return"></path>
+            <text x="845" y="835" class="svg-step">11. User scrubs inside viewer; runtime selects frames[frame_index] without Streamlit rerun</text>
+          </svg>
+        </div>
+
+        <div class="payload-grid">
+          <div class="payload-card">
+            <h3>Layer payload</h3>
+            <pre>{
+  "type": "bbox_layers_by_frame",
+  "frames": {
+    "42": {
+      "gt": [{ "x": 1.0, "source": "GT" }],
+      "pred": [{ "x": 1.2, "source": "EST" }],
+      "matched_pairs": [
+        { "gt_idx": 0, "pred_idx": 0, "pair_uuid": "..." }
+      ]
+    }
+  }
+}</pre>
+          </div>
+          <div class="payload-card">
+            <h3>Iframe URL</h3>
+            <pre>viewer_three_url =
+  T4_VISUALIZER_BASE_URL
+  + "/viewer/three?"
+  + "t4dataset_id=..."
+  + "&scenario_name=..."
+  + "&frame_index=min_frame"</pre>
+          </div>
+          <div class="payload-card">
+            <h3>Post timing</h3>
+            <pre>post("iframe-load")
+retry every 250ms up to 12 times
+post("initial-delay-300ms")
+post("initial-delay-1200ms")
+
+targetOrigin = new URL(iframe.src).origin</pre>
+          </div>
+        </div>
+
+        <div class="callout warn" style="margin-top:20px">
+          <strong>Key distinction:</strong> the 3D viewer does not call <code>POST /render</code> for every frame.
+          It embeds <code>/viewer/three</code> once and sends all-frame overlay data via <code>postMessage</code>.
+          Camera PNG rendering is a separate HTTP render path.
+        </div>
+      </div>
+    </section>
+
+    <section id="compare-state" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Compare Mode State</div>
+          <h2>Overview is the state source for comparison pages.</h2>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1180 520" role="img" aria-label="Compare mode state diagram">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+            </defs>
+            <text x="40" y="40" class="svg-title">Compare mode data/state propagation</text>
+
+            <rect x="50" y="95" width="240" height="300" rx="12" class="svg-note"></rect>
+            <text x="82" y="130" class="svg-actor-text">Overview</text>
+            <text x="82" y="165" class="svg-note-text">mode = Compare Mode</text>
+            <text x="82" y="195" class="svg-note-text">runA = Baseline</text>
+            <text x="82" y="225" class="svg-note-text">runB / all_runs = Candidates</text>
+            <text x="82" y="255" class="svg-note-text">run_labels = A, B, C...</text>
+            <text x="82" y="285" class="svg-note-text">label filters</text>
+            <text x="82" y="315" class="svg-note-text">query params run_a/run_b</text>
+
+            <rect x="420" y="95" width="260" height="300" rx="12" class="svg-note-warn"></rect>
+            <text x="455" y="130" class="svg-actor-text">Session State + URL Hydration</text>
+            <text x="455" y="165" class="svg-note-text">st.session_state stores run objects</text>
+            <text x="455" y="195" class="svg-note-text">overview_url_hydrate can rebuild</text>
+            <text x="455" y="225" class="svg-note-text">state from query parameters</text>
+            <text x="455" y="255" class="svg-note-text">important with multiple Streamlit</text>
+            <text x="455" y="285" class="svg-note-text">replicas or direct subpage links</text>
+
+            <rect x="810" y="65" width="300" height="360" rx="12" class="svg-note"></rect>
+            <text x="845" y="100" class="svg-actor-text">Pages consume shared state</text>
+            <text x="845" y="140" class="svg-note-text">TP Summary -> ΔTP / metric deltas</text>
+            <text x="845" y="175" class="svg-note-text">Criteria -> pass-rate / gate deltas</text>
+            <text x="845" y="210" class="svg-note-text">Detection Stats -> status + distance diffs</text>
+            <text x="845" y="245" class="svg-note-text">Bounding Box -> side-by-side/overlay BEV</text>
+            <text x="845" y="280" class="svg-note-text">T4 3D -> selected run layers</text>
+            <text x="845" y="315" class="svg-note-text">Prediction -> ADE/FDE delta matrices</text>
+
+            <path d="M290 210 L420 210" class="svg-msg"></path>
+            <text x="310" y="198" class="svg-step">write</text>
+            <path d="M680 210 L810 210" class="svg-msg"></path>
+            <text x="705" y="198" class="svg-step">read</text>
+          </svg>
+        </div>
+      </div>
+    </section>
+
+    <section id="reports" class="reveal">
+      <div class="shell">
+        <div class="section-head">
+          <div class="kicker">Report Generation</div>
+          <h2>Dashboard PDF and specsheet PDF are different engines.</h2>
+        </div>
+        <div class="real-diagram">
+          <svg viewBox="0 0 1180 520" role="img" aria-label="Report generation diagram">
+            <defs>
+              <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#2563eb"></path>
+              </marker>
+              <marker id="arrow-teal" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
+                <path d="M0,0 L10,4 L0,8 z" fill="#0f766e"></path>
+              </marker>
+            </defs>
+            <text x="40" y="40" class="svg-title">Two report paths</text>
+
+            <rect x="60" y="95" width="220" height="110" rx="12" class="svg-actor"></rect>
+            <text x="94" y="130" class="svg-actor-text">Overview selection</text>
+            <text x="94" y="154" class="svg-small">mode, run(s), filters</text>
+            <text x="94" y="176" class="svg-small">Summary.csv charts</text>
+
+            <rect x="410" y="70" width="260" height="160" rx="12" class="svg-note"></rect>
+            <text x="445" y="105" class="svg-actor-text">Dashboard PDF</text>
+            <text x="445" y="140" class="svg-note-text">lib/overview_pdf_report.py</text>
+            <text x="445" y="170" class="svg-note-text">curated dashboard snapshot</text>
+            <text x="445" y="200" class="svg-note-text">current view + selected filters</text>
+
+            <rect x="410" y="290" width="260" height="160" rx="12" class="svg-note-warn"></rect>
+            <text x="445" y="325" class="svg-actor-text">Release Specsheet PDF</text>
+            <text x="445" y="360" class="svg-note-text">lib/specsheet_report.py</text>
+            <text x="445" y="390" class="svg-note-text">perception_catalog_analyzer</text>
+            <text x="445" y="420" class="svg-note-text">blocks + trend plots + template</text>
+
+            <rect x="820" y="120" width="250" height="85" rx="12" class="svg-actor"></rect>
+            <text x="868" y="155" class="svg-actor-text">overview_report.pdf</text>
+            <text x="868" y="178" class="svg-small">dashboard narrative</text>
+
+            <rect x="820" y="335" width="250" height="85" rx="12" class="svg-actor"></rect>
+            <text x="875" y="370" class="svg-actor-text">specsheet.pdf</text>
+            <text x="875" y="393" class="svg-small">release specsheet</text>
+
+            <path d="M280 150 L410 150" class="svg-msg"></path>
+            <path d="M670 150 L820 160" class="svg-msg"></path>
+            <path d="M280 175 C350 250,350 330,410 370" class="svg-msg"></path>
+            <path d="M670 370 L820 377" class="svg-msg"></path>
+            <text x="85" y="485" class="svg-note-text">Specsheet is an advanced report path. Most users first use the dashboard pages and dashboard PDF.</text>
+            <text x="650" y="485" class="svg-note-text">For full detail, open the Specsheet guide page.</text>
+          </svg>
+        </div>
+        <div class="actions">
+          <a class="button" href="specsheet.html">Open Specsheet Details</a>
+        </div>
+      </div>
+    </section>
+  </main>
+
+  <footer class="footer"><div class="shell"><a href="index.html">Back to guide home</a></div></footer>
+  <script src="guide.js"></script>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/docs/specsheet_pipeline_explainer.html b/evaluation_dashboard_app/docs/specsheet_pipeline_explainer.html
new file mode 100644
index 0000000..0fb205f
--- /dev/null
+++ b/evaluation_dashboard_app/docs/specsheet_pipeline_explainer.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta http-equiv="refresh" content="0; url=guide/specsheet.html" />
+  <title>Specsheet Details</title>
+  <link rel="canonical" href="guide/specsheet.html" />
+</head>
+<body>
+  <p><a href="guide/specsheet.html">Open Specsheet Details</a></p>
+</body>
+</html>
diff --git a/evaluation_dashboard_app/lib/auth.py b/evaluation_dashboard_app/lib/auth.py
index b15d5dc..c29c6eb 100644
--- a/evaluation_dashboard_app/lib/auth.py
+++ b/evaluation_dashboard_app/lib/auth.py
@@ -4,8 +4,10 @@
 a header with the user identity. When enabled, users see only their own tasks.
 """
 
+import base64
+import json
 import os
-from typing import Optional
+from typing import Any, Dict, Optional
 
 # Header name set by auth proxy (e.g. X-Forwarded-User, X-Auth-User). Empty = no auth filtering.
 AUTH_USER_HEADER = os.environ.get("AUTH_USER_HEADER", "").strip()
@@ -14,6 +16,108 @@
 AUTH_DEFAULT_USER = os.environ.get("AUTH_DEFAULT_USER", "").strip() or None
 
 
+def _first_nonempty_string(*values: Any) -> str:
+    """Return the first non-empty string-like value, else empty string."""
+    for value in values:
+        text = str(value or "").strip()
+        if text:
+            return text
+    return ""
+
+
+def _read_streamlit_headers() -> Dict[str, str]:
+    """Best-effort request headers from Streamlit context."""
+    try:
+        import streamlit as st
+
+        ctx = getattr(st, "context", None)
+        headers = getattr(ctx, "headers", None) if ctx else None
+        if callable(headers):
+            headers = headers()
+        if isinstance(headers, dict):
+            normalized: Dict[str, str] = {}
+            for key, value in headers.items():
+                if not isinstance(key, str):
+                    continue
+                normalized[key] = str(value)
+            return normalized
+    except Exception:
+        pass
+    return {}
+
+
+def _decode_jwt_payload(token: str) -> Dict[str, Any]:
+    """Best-effort JWT payload decode without signature verification, for display only."""
+    raw = str(token or "").strip()
+    if not raw:
+        return {}
+    parts = raw.split(".")
+    if len(parts) < 2:
+        return {}
+    payload = parts[1]
+    padding = "=" * (-len(payload) % 4)
+    try:
+        decoded = base64.urlsafe_b64decode(payload + padding)
+        data = json.loads(decoded.decode("utf-8"))
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+
+
+def _extract_identity_from_bearer_token(headers: Dict[str, str]) -> Dict[str, Any]:
+    """Extract subject / email / username / display name from common bearer token claims."""
+    authz = str(headers.get("Authorization") or headers.get("authorization") or "").strip()
+    if not authz.lower().startswith("bearer "):
+        return {}
+    token = authz.split(" ", 1)[1].strip()
+    payload = _decode_jwt_payload(token)
+    if not payload:
+        return {}
+
+    session = payload.get("session") or {}
+    identity = session.get("identity") or {}
+    traits = identity.get("traits") or {}
+    name = traits.get("name") or {}
+    oauth_username = _first_nonempty_string(
+        payload.get("preferred_username"),
+        payload.get("username"),
+        payload.get("upn"),
+        payload.get("unique_name"),
+        payload.get("cognito:username"),
+        traits.get("username"),
+        identity.get("username"),
+    )
+    full_name = " ".join(
+        part for part in [str(name.get("first") or "").strip(), str(name.get("last") or "").strip()] if part
+    ).strip()
+    display_name = _first_nonempty_string(
+        payload.get("name"),
+        full_name,
+        traits.get("display_name"),
+        identity.get("display_name"),
+        oauth_username,
+        traits.get("email"),
+    )
+    email = _first_nonempty_string(
+        payload.get("email"),
+        payload.get("upn"),
+        traits.get("email"),
+        identity.get("email"),
+    )
+    subject_id = _first_nonempty_string(
+        payload.get("sub"),
+        session.get("account", {}).get("subject_id"),
+        identity.get("id"),
+    )
+    return {
+        "subject_id": subject_id,
+        "email": email,
+        "username": oauth_username,
+        "name": display_name,
+        "claims": payload,
+    }
+
+
 def get_current_user_id() -> Optional[str]:
     """
     Return the current user identifier, or None if auth is not configured.
@@ -24,19 +128,10 @@ def get_current_user_id() -> Optional[str]:
     """
     if not AUTH_USER_HEADER and not AUTH_DEFAULT_USER:
         return None
-    # Try to read header (Streamlit 1.37+)
-    try:
-        import streamlit as st
-        ctx = getattr(st, "context", None)
-        headers = getattr(ctx, "headers", None) if ctx else None
-        if callable(headers):
-            headers = headers()
-        if isinstance(headers, dict):
-            value = headers.get(AUTH_USER_HEADER) or headers.get(AUTH_USER_HEADER.lower())
-            if value and isinstance(value, str) and value.strip():
-                return value.strip()
-    except Exception:
-        pass
+    headers = _read_streamlit_headers()
+    value = headers.get(AUTH_USER_HEADER) or headers.get(AUTH_USER_HEADER.lower())
+    if value and isinstance(value, str) and value.strip():
+        return value.strip()
     return AUTH_DEFAULT_USER
 
 
diff --git a/evaluation_dashboard_app/lib/criteria_absolute_gates.py b/evaluation_dashboard_app/lib/criteria_absolute_gates.py
index c23eaa9..2a10770 100644
--- a/evaluation_dashboard_app/lib/criteria_absolute_gates.py
+++ b/evaluation_dashboard_app/lib/criteria_absolute_gates.py
@@ -11,6 +11,8 @@
 
 import pandas as pd
 
+from lib.score_schema import score_base_cols, score_identity_cols
+
 MetricOp = Literal["<=", ">="]
 
 MAX_CRITERIA_DEFAULT = 32
@@ -22,11 +24,11 @@ def infer_criteria_count(
     max_criteria: int = MAX_CRITERIA_DEFAULT,
 ) -> int:
     """
-    Number of criteria blocks in a raw Score dataframe (first 3 cols are base).
+    Number of criteria blocks in a raw Score dataframe.
     """
     if df_raw is None or df_raw.shape[1] < 3:
         return 1
-    n = (df_raw.shape[1] - 3) // block_size
+    n = (df_raw.shape[1] - len(score_base_cols(df_raw))) // block_size
     n = max(1, n)
     return int(min(n, max_criteria))
 
@@ -65,7 +67,7 @@ def evaluate_scenario_gates(
         raise ValueError(f"Metric column {metric_gate.column!r} not in df_view")
 
     empty_cols = [
-        "Scenario",
+        *score_identity_cols(df_view),
         "agg_pass_rate",
         "metric_agg",
         "scenario_pass",
@@ -82,7 +84,12 @@ def evaluate_scenario_gates(
         d[metric_gate.column] = pd.to_numeric(d[metric_gate.column], errors="coerce")
 
     rows: list[dict[str, Any]] = []
-    for scen, grp in d.groupby("Scenario", observed=True):
+    identity_cols = score_identity_cols(d)
+    for key, grp in d.groupby(identity_cols, observed=True):
+        if len(identity_cols) == 1:
+            identity_values = {"Scenario": key[0] if isinstance(key, tuple) else key}
+        else:
+            identity_values = dict(zip(identity_cols, key))
         rc = len(grp)
         pr = grp["pass_rate"]
         mean_pr = float(pr.mean())
@@ -113,7 +120,7 @@ def evaluate_scenario_gates(
 
         rows.append(
             {
-                "Scenario": scen,
+                **identity_values,
                 "row_count": rc,
                 "agg_pass_rate": mean_pr,
                 "metric_agg": m_agg,
diff --git a/evaluation_dashboard_app/lib/db.py b/evaluation_dashboard_app/lib/db.py
index 7110d13..a678f08 100644
--- a/evaluation_dashboard_app/lib/db.py
+++ b/evaluation_dashboard_app/lib/db.py
@@ -29,6 +29,9 @@ def _task_log_timestamp_prefix() -> str:
     "run_eval_dirs",
     "generate_summary_csv",
     "build_parquet",
+    "download_and_eval",
+    "run_evaluator_and_process",
+    "run_release_specsheet_workflow",
 )
 TASK_STATUSES = ("pending", "running", "completed", "failed")
 
@@ -404,7 +407,7 @@ def update_task_result_summary(task_id: str, summary: Dict[str, Any]) -> bool:
 
 
 def get_task(task_id: str) -> Optional[Dict[str, Any]]:
-    """Return task row as dict (includes ``rq_job_id`` for RQ cancel / reconcile)."""
+    """Return task row as dict (includes ``rq_job_id`` and ``session_id`` when available)."""
     url = get_database_url()
     if not url:
         return None
@@ -420,7 +423,7 @@ def get_task(task_id: str) -> Optional[Dict[str, Any]]:
                 cur.execute(
                     """SELECT id, type, status, parameters, result_path, error_message,
                        progress_message, progress_pct, log_output, result_summary, rq_job_id,
-                       created_at, updated_at
+                       session_id, created_at, updated_at
                        FROM tasks WHERE id = %s""",
                     (task_id,),
                 )
@@ -436,14 +439,17 @@ def get_task(task_id: str) -> Optional[Dict[str, Any]]:
 
 def list_recent_tasks(
     limit: int = 50,
+    offset: int = 0,
     session_id: Optional[str] = None,
     since_days: Optional[int] = None,
+    include_details: bool = False,
 ) -> List[Dict[str, Any]]:
     """Return recent tasks (newest first).
 
     If ``session_id`` is set, only that user's tasks.
     If ``since_days`` is set, only tasks with ``created_at`` within that many calendar days
     (from DB ``NOW()``). ``limit`` still caps row count.
+    ``include_details`` includes heavy log/result payloads; task list cards do not need them.
     """
     url = get_database_url()
     if not url:
@@ -457,7 +463,12 @@ def list_recent_tasks(
         conn = psycopg2.connect(url)
         try:
             with conn.cursor(cursor_factory=RealDictCursor) as cur:
-                cols = "id, type, status, parameters, result_path, error_message, progress_message, progress_pct, log_output, result_summary, rq_job_id, created_at, updated_at"
+                cols = (
+                    "id, type, status, parameters, result_path, error_message, "
+                    "progress_message, progress_pct, rq_job_id, created_at, updated_at"
+                )
+                if include_details:
+                    cols += ", log_output, result_summary"
                 conditions: List[str] = []
                 params: List[Any] = []
                 if session_id is not None:
@@ -469,13 +480,13 @@ def list_recent_tasks(
                     )
                     params.append(int(since_days))
                 where = (" WHERE " + " AND ".join(conditions)) if conditions else ""
-                params.append(limit)
+                params.extend([max(0, int(limit)), max(0, int(offset))])
                 cur.execute(
                     f"""
                     SELECT {cols}
                     FROM tasks{where}
                     ORDER BY created_at DESC
-                    LIMIT %s
+                    LIMIT %s OFFSET %s
                     """,
                     params,
                 )
@@ -494,6 +505,45 @@ def list_recent_tasks(
     return rows
 
 
+def count_recent_tasks(
+    session_id: Optional[str] = None,
+    since_days: Optional[int] = None,
+) -> int:
+    """Return total task count for the same filter shape as ``list_recent_tasks``."""
+    url = get_database_url()
+    if not url:
+        return 0
+    try:
+        import psycopg2
+    except ImportError:
+        return 0
+    try:
+        conn = psycopg2.connect(url)
+        try:
+            with conn.cursor() as cur:
+                conditions: List[str] = []
+                params: List[Any] = []
+                if session_id is not None:
+                    conditions.append("session_id = %s")
+                    params.append(session_id)
+                if since_days is not None:
+                    conditions.append(
+                        "created_at >= NOW() - (%s::integer * INTERVAL '1 day')"
+                    )
+                    params.append(int(since_days))
+                where = (" WHERE " + " AND ".join(conditions)) if conditions else ""
+                cur.execute(
+                    f"SELECT COUNT(*) FROM tasks{where}",
+                    params,
+                )
+                row = cur.fetchone()
+                return int(row[0]) if row and row[0] is not None else 0
+        finally:
+            conn.close()
+    except Exception:
+        return 0
+
+
 def delete_task(task_id: str, session_id: Optional[str] = None) -> bool:
     """Delete a task row. For pending/running, cancels the RQ job first when ``rq_job_id`` is set."""
     url = get_database_url()
diff --git a/evaluation_dashboard_app/lib/deploy_debug.py b/evaluation_dashboard_app/lib/deploy_debug.py
index 0edeb76..d45f024 100644
--- a/evaluation_dashboard_app/lib/deploy_debug.py
+++ b/evaluation_dashboard_app/lib/deploy_debug.py
@@ -157,6 +157,109 @@ def task_counts_by_status() -> Tuple[bool, str, Optional[Dict[str, int]]]:
             return False, str(e), None
 
 
+def database_table_overview() -> Tuple[bool, str, Optional[List[Dict[str, Any]]]]:
+    """Return public table names with approximate row counts for DB debugging."""
+    if not get_database_url():
+        return False, "DATABASE_URL is not set", None
+    with get_connection() as conn:
+        if conn is None:
+            return False, "No database connection", None
+        try:
+            with conn.cursor() as cur:
+                cur.execute(
+                    """
+                    SELECT
+                        t.table_name,
+                        COALESCE(c.reltuples::bigint, 0) AS estimated_rows,
+                        CASE WHEN c.oid IS NULL THEN 0 ELSE pg_total_relation_size(c.oid) END AS total_bytes
+                    FROM information_schema.tables t
+                    LEFT JOIN pg_namespace n ON n.nspname = t.table_schema
+                    LEFT JOIN pg_class c ON c.relname = t.table_name AND c.relnamespace = n.oid
+                    WHERE t.table_schema = 'public'
+                      AND t.table_type = 'BASE TABLE'
+                    ORDER BY t.table_name
+                    """
+                )
+                rows = [
+                    {
+                        "table_name": str(r[0]),
+                        "estimated_rows": int(r[1] or 0),
+                        "total_bytes": int(r[2] or 0),
+                    }
+                    for r in cur.fetchall()
+                ]
+            return True, "OK", rows
+        except Exception as e:
+            return False, str(e), None
+
+
+def database_recent_task_rows(
+    *,
+    limit: int = 50,
+    offset: int = 0,
+    status: Optional[str] = None,
+    task_type: Optional[str] = None,
+    search: Optional[str] = None,
+) -> Tuple[bool, str, List[Dict[str, Any]], int]:
+    """Read recent rows from the task table for the deployment debug DB tab."""
+    if not get_database_url():
+        return False, "DATABASE_URL is not set", [], 0
+    with get_connection() as conn:
+        if conn is None:
+            return False, "No database connection", [], 0
+        try:
+            from psycopg2.extras import RealDictCursor
+        except ImportError:
+            return False, "psycopg2 not installed", [], 0
+
+        where_parts: List[str] = []
+        params: List[Any] = []
+        if status:
+            where_parts.append("status = %s")
+            params.append(status)
+        if task_type:
+            where_parts.append("type = %s")
+            params.append(task_type)
+        if search:
+            needle = f"%{search.strip()}%"
+            where_parts.append(
+                """
+                (
+                    id::text ILIKE %s OR type ILIKE %s OR status ILIKE %s OR
+                    COALESCE(session_id, '') ILIKE %s OR COALESCE(rq_job_id, '') ILIKE %s OR
+                    COALESCE(result_path, '') ILIKE %s OR COALESCE(error_message, '') ILIKE %s OR
+                    COALESCE(parameters::text, '') ILIKE %s OR COALESCE(result_summary, '') ILIKE %s
+                )
+                """
+            )
+            params.extend([needle] * 9)
+
+        where_sql = (" WHERE " + " AND ".join(where_parts)) if where_parts else ""
+        capped_limit = max(1, min(int(limit), 500))
+        safe_offset = max(0, int(offset))
+        try:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute(f"SELECT COUNT(*) FROM tasks{where_sql}", params)
+                total_row = cur.fetchone()
+                total = int(total_row["count"] if total_row else 0)
+                cur.execute(
+                    f"""
+                    SELECT
+                        id, type, status, session_id, rq_job_id,
+                        created_at, updated_at, progress_pct, progress_message,
+                        result_path, error_message, parameters, result_summary, log_output
+                    FROM tasks{where_sql}
+                    ORDER BY created_at DESC
+                    LIMIT %s OFFSET %s
+                    """,
+                    [*params, capped_limit, safe_offset],
+                )
+                rows = [dict(row) for row in cur.fetchall()]
+            return True, "OK", rows, total
+        except Exception as e:
+            return False, str(e), [], 0
+
+
 def docker_unix_socket_for_check() -> Optional[str]:
     """Path to Unix socket for existence check, or None if DOCKER_HOST is non-Unix (e.g. tcp)."""
     host = os.environ.get("DOCKER_HOST", "").strip()
@@ -223,12 +326,25 @@ def list_containers_for_debug(client) -> Tuple[List[Dict[str, str]], Optional[st
         rows: List[Dict[str, str]] = []
         for c in containers:
             cid = c.id or ""
+            attrs = getattr(c, "attrs", None) or {}
+            state = attrs.get("State") or {}
+            state_status = (state.get("Status") or getattr(c, "status", "") or "").strip()
+            health_obj = state.get("Health") or {}
+            health_s = (health_obj.get("Status") or "").strip()
+            labels = (attrs.get("Config") or {}).get("Labels") or {}
+            if not isinstance(labels, dict):
+                labels = {}
+            compose_service = (labels.get("com.docker.compose.service") or "").strip()
+            compose_project = (labels.get("com.docker.compose.project") or "").strip()
             rows.append(
                 {
                     "id": cid[:12] if len(cid) >= 12 else cid,
                     "full_id": cid,
                     "name": (c.name or "").lstrip("/"),
-                    "status": getattr(c, "status", "") or "",
+                    "state": state_status or "unknown",
+                    "health": health_s if health_s else "—",
+                    "compose_service": compose_service or "—",
+                    "compose_project": compose_project or "—",
                     "image": c.image.tags[0] if c.image and c.image.tags else (c.image.id[:12] if c.image else ""),
                 }
             )
diff --git a/evaluation_dashboard_app/lib/detection_stats_debug.py b/evaluation_dashboard_app/lib/detection_stats_debug.py
new file mode 100644
index 0000000..23c1df4
--- /dev/null
+++ b/evaluation_dashboard_app/lib/detection_stats_debug.py
@@ -0,0 +1,160 @@
+"""
+Optional verbose logging for pages/3_Detection_Stats.py (502 / freeze / OOM debugging).
+
+Enable with environment variable:
+  EVAL_DETECTION_STATS_DEBUG=1
+
+Logs go to stderr (visible in `docker compose logs streamlit1`).
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import resource
+import sys
+import time
+import traceback
+from contextlib import contextmanager
+from typing import Any, List, Tuple
+
+_LOG = logging.getLogger("eval_dashboard.detection_stats")
+_CONFIGURED = False
+
+
+def detection_stats_debug_enabled() -> bool:
+    v = os.environ.get("EVAL_DETECTION_STATS_DEBUG", "").strip().lower()
+    return v in ("1", "true", "yes", "on")
+
+
+def _ensure_logging() -> None:
+    global _CONFIGURED
+    if not detection_stats_debug_enabled():
+        return
+    if _CONFIGURED:
+        return
+    _LOG.setLevel(logging.DEBUG)
+    h = logging.StreamHandler(sys.stderr)
+    h.setFormatter(
+        logging.Formatter("%(asctime)s [%(levelname)s] detection_stats: %(message)s")
+    )
+    _LOG.addHandler(h)
+    _LOG.propagate = False
+    _CONFIGURED = True
+
+
+def ds_dlog(fmt: str, *args: Any) -> None:
+    """Log one line when debug is enabled."""
+    if not detection_stats_debug_enabled():
+        return
+    _ensure_logging()
+    try:
+        _LOG.info(fmt, *args)
+    except Exception:
+        _LOG.info("%s %s", fmt, args)
+
+
+def ds_debug_init_session_state(session_state: Any) -> None:
+    """Call once per script run (after set_page_config). Resets timing buffer."""
+    if not detection_stats_debug_enabled():
+        return
+    session_state["_ds_debug_timings"] = []
+    session_state["_ds_debug_run_started"] = time.perf_counter()
+    ds_dlog("=== Detection Stats script run started ===")
+    ds_dlog("pid=%s argv[0]=%s", os.getpid(), sys.argv[0] if sys.argv else "")
+    for key in (
+        "EVAL_DETECTION_STATS_DEBUG",
+        "STREAMLIT_SERVER_COOKIE_SECRET",
+        "EVAL_DASHBOARD_DATA_ROOT",
+    ):
+        v = os.environ.get(key)
+        if key == "STREAMLIT_SERVER_COOKIE_SECRET" and v:
+            ds_dlog("env %s=(set len=%s)", key, len(v))
+        else:
+            ds_dlog("env %s=%r", key, v)
+
+
+def ds_debug_log_memory(note: str = "") -> None:
+    if not detection_stats_debug_enabled():
+        return
+    try:
+        ru = resource.getrusage(resource.RUSAGE_SELF)
+        # Linux: ru_maxrss kilobytes; macOS: bytes (best-effort label)
+        ds_dlog(
+            "MEM %s ru_maxrss=%s ru_utime=%.3fs ru_stime=%.3fs",
+            note,
+            ru.ru_maxrss,
+            ru.ru_utime,
+            ru.ru_stime,
+        )
+    except Exception as e:
+        ds_dlog("MEM %s (unavailable: %s)", note, e)
+
+
+def _append_timing(session_state: Any, name: str, seconds: float) -> None:
+    if not detection_stats_debug_enabled():
+        return
+    lst = session_state.get("_ds_debug_timings")
+    if not isinstance(lst, list):
+        lst = []
+        session_state["_ds_debug_timings"] = lst
+    lst.append((name, seconds))
+
+
+@contextmanager
+def ds_dtimer(name: str, session_state: Any):
+    """Time a block; record to session_state for the debug expander."""
+    if not detection_stats_debug_enabled():
+        yield
+        return
+    t0 = time.perf_counter()
+    ds_dlog("TIMER start %s", name)
+    try:
+        yield
+    finally:
+        dt = time.perf_counter() - t0
+        ds_dlog("TIMER end %s (%.3fs)", name, dt)
+        _append_timing(session_state, name, dt)
+
+
+def ds_debug_log_exception(where: str, exc: BaseException) -> None:
+    if not detection_stats_debug_enabled():
+        return
+    _ensure_logging()
+    _LOG.exception("EXCEPTION in %s: %s", where, exc)
+
+
+def ds_debug_render_expander(session_state: Any) -> None:
+    """Renders a Streamlit expander with timings + env (only if debug on)."""
+    import streamlit as st
+
+    if not detection_stats_debug_enabled():
+        return
+    t_run = session_state.get("_ds_debug_run_started")
+    total_s = None
+    if isinstance(t_run, (int, float)):
+        total_s = time.perf_counter() - float(t_run)
+
+    timings: List[Tuple[str, float]] = session_state.get("_ds_debug_timings") or []
+    lines = [
+        f"Total wall time (approx): {total_s:.3f}s" if total_s is not None else "Total wall time: n/a",
+        "",
+        "Section timings (seconds):",
+    ]
+    for name, sec in timings:
+        lines.append(f"  - {name}: {sec:.3f}s")
+    if not timings:
+        lines.append("  (no ds_dtimer sections recorded)")
+
+    lines.extend(
+        [
+            "",
+            "Environment (subset):",
+            f"  EVAL_DETECTION_STATS_DEBUG={os.environ.get('EVAL_DETECTION_STATS_DEBUG', '')!r}",
+            f"  EVAL_DASHBOARD_DATA_ROOT={os.environ.get('EVAL_DASHBOARD_DATA_ROOT', '')!r}",
+        ]
+    )
+
+    with st.expander("Detection Stats debug (EVAL_DETECTION_STATS_DEBUG=1)", expanded=False):
+        st.code("\n".join(lines), language="text")
+        st.caption("Check `docker compose logs streamlit1` for the same lines on stderr.")
diff --git a/evaluation_dashboard_app/lib/docker_live_structure.py b/evaluation_dashboard_app/lib/docker_live_structure.py
new file mode 100644
index 0000000..9274c87
--- /dev/null
+++ b/evaluation_dashboard_app/lib/docker_live_structure.py
@@ -0,0 +1,243 @@
+"""
+Mermaid source for the Deployment debug Docker tab: same subgraph layout as Readme.md (Help).
+
+Clients → Edge → App Tier → T4 dataset server (optional) → Infrastructure → Workers → Host data,
+with live container labels. T4 may be a Compose service (e.g. ``t4_server``) or an external HTTP
+endpoint from ``T4_VISUALIZER_BASE_URL`` (synthetic node).
+"""
+
+from __future__ import annotations
+
+import os
+from collections import defaultdict
+from typing import Dict, List, Optional
+from urllib.parse import urlparse
+
+
+def _by_compose_service(rows: List[Dict[str, str]]) -> Dict[str, List[int]]:
+    by: Dict[str, List[int]] = defaultdict(list)
+    for i, r in enumerate(rows):
+        svc = (r.get("compose_service") or "").strip()
+        if svc and svc != "—":
+            by[svc].append(i)
+    return by
+
+
+def _mermaid_plain(s: str, max_len: int) -> str:
+    return (s or "")[:max_len].replace('"', "'").replace("\n", " ").replace("#", " ")
+
+
+def _row_mermaid_label(r: Dict[str, str]) -> str:
+    name = _mermaid_plain(r.get("name"), 38)
+    stt = _mermaid_plain(r.get("state"), 14)
+    svc = _mermaid_plain(r.get("compose_service"), 18) or "—"
+    hl = (r.get("health") or "").strip()
+    if hl and hl != "—":
+        return f"{name}<br/>{stt} · {svc}<br/>{_mermaid_plain(hl, 14)}"
+    return f"{name}<br/>{stt} · {svc}"
+
+
+def _row_class(r: Dict[str, str]) -> str:
+    s = (r.get("state") or "").lower()
+    if s == "running":
+        return "run"
+    if s in ("exited", "dead"):
+        return "x"
+    return "o"
+
+
+def _nid(i: int) -> str:
+    return f"N{i}"
+
+
+def _nid_list(idxs: List[int]) -> Optional[str]:
+    if not idxs:
+        return None
+    return " & ".join(_nid(i) for i in idxs)
+
+
+def _is_t4_compose_service(svc: str) -> bool:
+    s = (svc or "").strip().lower()
+    if not s or s == "—":
+        return False
+    if s in ("t4_visualizer", "t4_server", "t4_visualizer_server", "t4"):
+        return True
+    return s.startswith("t4_")
+
+
+def rowset_has_t4_compose_service(rows: List[Dict[str, str]]) -> bool:
+    """True if any listed container is classified as the T4 dataset server (Compose service name)."""
+    return any(_is_t4_compose_service(str(r.get("compose_service") or "")) for r in rows)
+
+
+def _t4_url_display(url: str, *, max_len: int = 52) -> str:
+    """Short label for Mermaid (host:port or truncated URL)."""
+    u = (url or "").strip()
+    if not u:
+        return "(not set)"
+    try:
+        p = urlparse(u)
+        if p.netloc:
+            out = p.netloc
+        else:
+            out = u
+    except Exception:
+        out = u
+    out = out.replace('"', "'")
+    return out if len(out) <= max_len else out[: max_len - 1] + "…"
+
+
+T4_SYNTHETIC_NODE = "T4SYN"
+
+
+def live_containers_mermaid(
+    rows: List[Dict[str, str]],
+    *,
+    t4_visualizer_base_url: Optional[str] = None,
+) -> str:
+    """
+    flowchart LR with subgraphs matching Help / Readme.md:
+    Clients, Edge, App Tier, optional T4 dataset server, Infrastructure, Workers, Host data —
+    plus live labels per container. External T4 HTTP API appears as a synthetic node when
+    ``T4_VISUALIZER_BASE_URL`` is set and no matching Compose service is listed.
+    """
+    if t4_visualizer_base_url is None:
+        t4_visualizer_base_url = os.environ.get("T4_VISUALIZER_BASE_URL", "").strip() or None
+
+    if not rows:
+        return 'flowchart LR\n    _empty["No containers in filter"]'
+
+    by = _by_compose_service(rows)
+    nginx = sorted(by.get("nginx", []), key=lambda i: rows[i].get("name", ""))
+    st: List[int] = []
+    for svc in sorted(s for s in by if s.startswith("streamlit")):
+        st.extend(sorted(by[svc], key=lambda i: rows[i].get("name", "")))
+    redis = sorted(by.get("redis", []), key=lambda i: rows[i].get("name", ""))
+    pg = sorted(by.get("postgres", []), key=lambda i: rows[i].get("name", ""))
+    init = sorted(by.get("init_db", []), key=lambda i: rows[i].get("name", ""))
+    workers = sorted(by.get("worker", []), key=lambda i: rows[i].get("name", ""))
+    t4: List[int] = []
+    for svc, idxs in by.items():
+        if _is_t4_compose_service(svc):
+            t4.extend(sorted(idxs, key=lambda i: rows[i].get("name", "")))
+    t4 = sorted(set(t4), key=lambda i: rows[i].get("name", ""))
+    use_synthetic_t4 = bool(t4_visualizer_base_url) and not t4
+    known = set(nginx + st + redis + pg + init + workers + t4)
+    other = [i for i in range(len(rows)) if i not in known]
+
+    def node_line(i: int) -> str:
+        r = rows[i]
+        return f'        {_nid(i)}["{_row_mermaid_label(r)}"]:::{_row_class(r)}'
+
+    lines: List[str] = [
+        "flowchart LR",
+        "    classDef run fill:#c8e6c9,stroke:#2e7d32",
+        "    classDef x fill:#ffcdd2,stroke:#c62828",
+        "    classDef o fill:#e0e0e0,stroke:#616161",
+        "    classDef syn fill:#e3f2fd,stroke:#1565c0",
+        '    subgraph clients ["Clients"]',
+        "        BR[Browser]:::syn",
+        "    end",
+    ]
+
+    if nginx:
+        lines.append('    subgraph edge ["Edge"]')
+        for i in nginx:
+            lines.append(node_line(i))
+        lines.append("    end")
+
+    if st:
+        lines.append('    subgraph app ["App Tier"]')
+        for i in st:
+            lines.append(node_line(i))
+        lines.append("    end")
+
+    if t4 or use_synthetic_t4:
+        lines.append('    subgraph t4tier ["T4 dataset server"]')
+        if t4:
+            for i in t4:
+                lines.append(node_line(i))
+        else:
+            t4_lab = _mermaid_plain(
+                f"T4 visualizer (HTTP)<br/>{_t4_url_display(t4_visualizer_base_url or '')}",
+                120,
+            )
+            lines.append(f'        {T4_SYNTHETIC_NODE}["{t4_lab}"]:::syn')
+        lines.append("    end")
+
+    infra = redis + pg + init
+    if infra:
+        lines.append('    subgraph infra ["Infrastructure"]')
+        for i in infra:
+            lines.append(node_line(i))
+        lines.append("    end")
+
+    if workers:
+        lines.append('    subgraph workers ["Workers"]')
+        for i in workers:
+            lines.append(node_line(i))
+        lines.append("    end")
+
+    lines.append('    subgraph volumes ["Host data"]')
+    lines.append('        DR[Data root<br/>bind-mounted data]:::syn')
+    lines.append("    end")
+
+    if other:
+        lines.append('    subgraph misc ["Other"]')
+        for i in other:
+            lines.append(node_line(i))
+        lines.append("    end")
+
+    lines.append("")
+    lines.append("    %% Same topology as Readme.md Help")
+
+    nl_nginx = _nid_list(nginx)
+    nl_st = _nid_list(st)
+    nl_redis = _nid_list(redis)
+    nl_pg = _nid_list(pg)
+    nl_workers = _nid_list(workers)
+    nl_t4: Optional[str]
+    if t4:
+        nl_t4 = _nid_list(t4)
+    elif use_synthetic_t4:
+        nl_t4 = T4_SYNTHETIC_NODE
+    else:
+        nl_t4 = None
+
+    if nl_nginx:
+        lines.append(f"    BR --> {nl_nginx}")
+        if nl_st:
+            for i in nginx:
+                lines.append(f"    {_nid(i)} --> {nl_st}")
+    elif nl_st:
+        lines.append(f"    BR --> {nl_st}")
+
+    for i in st:
+        if nl_redis:
+            lines.append(f"    {_nid(i)} --> {nl_redis}")
+        if nl_pg:
+            lines.append(f"    {_nid(i)} --> {nl_pg}")
+        if nl_t4:
+            lines.append(f"    {_nid(i)} --> {nl_t4}")
+
+    for i in redis:
+        if nl_workers:
+            lines.append(f"    {_nid(i)} --> {nl_workers}")
+
+    for i in workers:
+        if nl_pg:
+            lines.append(f"    {_nid(i)} --> {nl_pg}")
+        lines.append(f"    {_nid(i)} --> DR")
+
+    if nl_t4:
+        if t4:
+            for i in t4:
+                lines.append(f"    {_nid(i)} --> DR")
+        else:
+            lines.append(f"    {T4_SYNTHETIC_NODE} --> DR")
+
+    for i in init:
+        for j in pg:
+            lines.append(f"    {_nid(i)} -.-> {_nid(j)}")
+
+    return "\n".join(lines)
diff --git a/evaluation_dashboard_app/lib/download_core.py b/evaluation_dashboard_app/lib/download_core.py
index 8b2dac9..4c59985 100644
--- a/evaluation_dashboard_app/lib/download_core.py
+++ b/evaluation_dashboard_app/lib/download_core.py
@@ -9,6 +9,7 @@
 import os
 import shutil
 import urllib.parse
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from collections import Counter
 from typing import Any, Callable, Dict, List, Optional
@@ -24,6 +25,19 @@
 API_BASE_URL = "https://evaluation.ci.web.auto/v3"
 
 
+def _compact_eval_path(path: Any, *, parts: int = 2) -> str:
+    """Return a readable tail path for progress/log messages."""
+    text = str(path or "").strip()
+    if not text:
+        return "unknown"
+    try:
+        p = Path(text)
+        tail = p.parts[-parts:]
+        return "/".join(tail) if tail else text
+    except Exception:
+        return text
+
+
 def _make_evaluator_session(environment: str = DEFAULT_ENVIRONMENT):
     """Build authenticated session for evaluation.ci.web.auto API (no Streamlit)."""
     os.environ["AUTH_PROFILE"] = environment
@@ -144,6 +158,8 @@ def get_case_simulation_log_info(
                 continue
             if "simulation_archive" not in report.get("logs", {}):
                 continue
+            if "simulation_result_json" not in report.get("logs", {}):
+                continue
             scenario_params = report.get("scenario_parameters") or {}
             result.append({
                 "suite_id": sid,
@@ -528,3 +544,196 @@ def run_download_scenarios(
         organize_files_into_directories(out_dir)
     total_attempted = len(log_dicts)
     return (failure_count, total_attempted, rows)
+
+
+def run_download_and_eval(
+    project_id: str,
+    job_id: str,
+    suite_id: Optional[str],
+    output_path: str,
+    download_type: str = "archives",
+    phase: str = "perception.object_recognition.tracking.objects",
+    *,
+    skip_large_file: bool = False,
+    large_file_mb: float = 50.0,
+    keep_zip_files: bool = False,
+    suite_ids: Optional[List[str]] = None,
+    run_eval: bool = True,
+    generate_parquet: bool = True,
+    eval_recursive: bool = True,
+    eval_overwrite: bool = False,
+    eval_workers: int = 4,
+    on_progress: Optional[Callable[[str], None]] = None,
+    on_warning: Optional[Callable[[str], None]] = None,
+) -> Dict[str, Any]:
+    """
+    Combined workflow: Download results, then optionally run eval and generate parquet.
+    
+    Returns dict with:
+        - download_success: bool
+        - download_summary: dict with success/fail counts
+        - eval_summary: dict with directories_processed, etc. (if run_eval=True)
+        - parquet_path: str (if generate_parquet=True)
+    """
+    from lib import eval_summary
+    
+    # Try to import parquet generation
+    pkl_archive_to_parquet = None
+    try:
+        from lib.perception_catalog_io import pkl_archive_to_parquet as _p2p
+        pkl_archive_to_parquet = _p2p
+    except ImportError:
+        pass
+    
+    result: Dict[str, Any] = {
+        "download_success": False,
+        "download_summary": {},
+        "eval_summary": {},
+        "parquet_path": "",
+        "errors": [],
+    }
+    
+    # Step 1: Download
+    if on_progress:
+        on_progress("Starting download phase...")
+    
+    try:
+        failure_count, total_attempted, rows = run_download_results(
+            project_id=project_id,
+            job_id=job_id,
+            suite_id=suite_id,
+            output_path=output_path,
+            download_type=download_type,
+            phase=phase,
+            skip_large_file=skip_large_file,
+            large_file_mb=large_file_mb,
+            keep_zip_files=keep_zip_files,
+            suite_ids=suite_ids,
+            on_progress=on_progress,
+            on_warning=on_warning,
+        )
+        success_count = total_attempted - failure_count
+        result["download_summary"] = {
+            "total": total_attempted,
+            "success": success_count,
+            "failed": failure_count,
+            "rows": rows,
+        }
+        
+        # Check if download was successful (at least some files downloaded)
+        result["download_success"] = success_count > 0
+        if failure_count > 0 and success_count == 0:
+            result["errors"].append(f"Download failed: {failure_count} of {total_attempted} scenarios failed")
+            return result
+        if success_count == 0:
+            result["errors"].append("Download: No scenarios were successfully downloaded")
+            return result
+            
+    except Exception as e:
+        result["errors"].append(f"Download exception: {e}")
+        return result
+    
+    # Step 2: Run eval (if requested and download succeeded)
+    if run_eval and result["download_success"]:
+        if on_progress:
+            on_progress("Download complete. Starting eval phase...")
+        
+        try:
+            eval_root = output_path
+            target_dirs = eval_summary.find_eval_result_dirs(eval_root, recursive=eval_recursive)
+            if target_dirs:
+                total = len(target_dirs)
+                eval_statuses: List[Dict[str, Any]] = []
+                try:
+                    requested_workers = int(eval_workers or 1)
+                except (TypeError, ValueError):
+                    requested_workers = 1
+                workers = max(1, min(requested_workers, total))
+                if on_progress:
+                    on_progress(f"Eval: completed 0/{total} dirs")
+
+                def _record_status(status: Dict[str, Any], done: int, fallback_path: str) -> None:
+                    eval_statuses.append(status)
+                    state = str(status.get("status") or "failed")
+                    short_path = _compact_eval_path(status.get("path") or fallback_path)
+                    if on_progress:
+                        on_progress(f"Eval: completed {done}/{total} dirs - {state}: {short_path}")
+                    if state == "failed" and on_warning:
+                        on_warning(f"Eval failed for {status.get('path', '')}: {status.get('detail', '')}")
+
+                if workers == 1:
+                    for i, result_dir in enumerate(target_dirs):
+                        status = eval_summary.run_eval_result_for_dir(result_dir, overwrite=eval_overwrite)
+                        _record_status(status, i + 1, result_dir)
+                else:
+                    if on_progress:
+                        on_progress(f"Eval: running {total} dirs with {workers} worker(s)")
+                    with ThreadPoolExecutor(max_workers=workers) as executor:
+                        future_map = {
+                            executor.submit(
+                                eval_summary.run_eval_result_for_dir,
+                                result_dir,
+                                overwrite=eval_overwrite,
+                            ): result_dir
+                            for result_dir in target_dirs
+                        }
+                        for done, future in enumerate(as_completed(future_map), start=1):
+                            result_dir = future_map[future]
+                            try:
+                                status = future.result()
+                            except Exception as exc:
+                                status = {"path": result_dir, "status": "failed", "detail": str(exc)}
+                            _record_status(status, done, result_dir)
+                
+                # Generate summary CSVs
+                csv_info = eval_summary.generate_summary_and_score_csv(eval_root)
+                failed = [s for s in eval_statuses if s.get("status") == "failed"]
+                skipped = [s for s in eval_statuses if s.get("status") == "skipped"]
+                succeeded = [s for s in eval_statuses if s.get("status") == "success"]
+                result["eval_summary"] = {
+                    "directories_processed": total,
+                    "success": len(succeeded),
+                    "failed": len(failed),
+                    "skipped": len(skipped),
+                    "summary_path": csv_info.get("summary_path", eval_root),
+                    "summary_rows": csv_info.get("summary_rows", 0),
+                    "score_rows": csv_info.get("score_rows", 0),
+                }
+                if failed:
+                    first = failed[0]
+                    result["errors"].append(
+                        f"Eval failed for {len(failed)} of {total} directories; "
+                        f"first: {first.get('path', '')} ({first.get('detail', '')})"
+                    )
+            else:
+                if on_warning:
+                    on_warning("No eval result directories found")
+        except Exception as e:
+            result["errors"].append(f"Eval exception: {e}")
+    
+    # Step 3: Generate parquet (if requested and download succeeded)
+    if generate_parquet and result["download_success"] and pkl_archive_to_parquet:
+        if on_progress:
+            on_progress("Generating parquet...")
+
+        def _on_parquet_progress(done: int, total: int) -> None:
+            if on_progress:
+                on_progress(f"Parquet: Processing {done}/{total} pkl files")
+
+        def _on_parquet_skip(path: str, reason: str) -> None:
+            if on_warning:
+                on_warning(f"Parquet skipped {path}: {reason}")
+        
+        try:
+            parquet_path = pkl_archive_to_parquet(
+                output_path,
+                on_progress=_on_parquet_progress,
+                on_skip=_on_parquet_skip,
+                project_id=project_id,
+                job_id=job_id,
+            )
+            result["parquet_path"] = parquet_path
+        except Exception as e:
+            result["errors"].append(f"Parquet exception: {e}")
+    
+    return result
diff --git a/evaluation_dashboard_app/lib/eval_summary.py b/evaluation_dashboard_app/lib/eval_summary.py
index 4080003..f198d14 100644
--- a/evaluation_dashboard_app/lib/eval_summary.py
+++ b/evaluation_dashboard_app/lib/eval_summary.py
@@ -5,12 +5,38 @@
 import glob
 import json
 import os
+import signal
+import subprocess
+import sys
+import tempfile
 from pathlib import Path
 from typing import Any, Dict, List
 
 from lib.perception_eval_result_summarizer import run_eval_result, generate_score_json
 
 
+def _write_text_atomic(path: str, content: str) -> None:
+    """Write text by replacing the target, so read-only existing files do not block writable dirs."""
+    target = Path(path)
+    tmp_name = ""
+    try:
+        with tempfile.NamedTemporaryFile(
+            "w",
+            encoding="utf-8",
+            dir=os.fspath(target.parent),
+            delete=False,
+        ) as f:
+            tmp_name = f.name
+            f.write(content)
+        os.replace(tmp_name, target)
+    finally:
+        if tmp_name and os.path.exists(tmp_name):
+            try:
+                os.unlink(tmp_name)
+            except OSError:
+                pass
+
+
 def find_eval_result_dirs(root_dir: str, recursive: bool = True) -> List[str]:
     """Return sorted list of directories under root_dir that contain scenario.yaml and scene_result.pkl."""
     if not os.path.isdir(root_dir):
@@ -28,8 +54,8 @@ def find_eval_result_dirs(root_dir: str, recursive: bool = True) -> List[str]:
     return sorted(result_dirs)
 
 
-def run_eval_result_for_dir(result_dir: str, overwrite: bool = False) -> Dict[str, Any]:
-    """Run eval_result and generate score.json for one directory. Returns status dict."""
+def _run_eval_result_for_dir_inline(result_dir: str, overwrite: bool = False) -> Dict[str, Any]:
+    """Run eval_result in the current process and generate score.json for one directory."""
     result_file = os.path.join(result_dir, "result.txt")
     score_file = os.path.join(result_dir, "score.json")
     if os.path.exists(result_file) and not overwrite:
@@ -59,6 +85,89 @@ def run_eval_result_for_dir(result_dir: str, overwrite: bool = False) -> Dict[st
         return {"path": result_dir, "status": "failed", "detail": str(e)}
 
 
+def _signal_detail(returncode: int) -> str:
+    """Return a human-readable detail string for a subprocess return code."""
+    if returncode < 0:
+        sig_num = -returncode
+    elif returncode > 128:
+        sig_num = returncode - 128
+    else:
+        return f"exit code {returncode}"
+    try:
+        sig_name = signal.Signals(sig_num).name
+    except ValueError:
+        sig_name = f"signal {sig_num}"
+    return f"{sig_name} ({sig_num})"
+
+
+def _write_eval_subprocess_failure(
+    result_dir: str,
+    message: str,
+    stdout: str = "",
+    stderr: str = "",
+) -> None:
+    """Persist native-crash details where the UI and user can inspect them."""
+    result_path = Path(result_dir) / "result.txt"
+    log_path = Path(result_dir) / "eval_subprocess.log"
+    detail = f"Error: {message}\n"
+    with open(result_path, "w", encoding="utf-8") as f:
+        f.write(detail)
+    with open(log_path, "w", encoding="utf-8") as f:
+        f.write(detail)
+        if stdout:
+            f.write("\n--- stdout ---\n")
+            f.write(stdout)
+        if stderr:
+            f.write("\n--- stderr ---\n")
+            f.write(stderr)
+
+
+def _run_eval_result_for_dir_subprocess(result_dir: str, overwrite: bool = False) -> Dict[str, Any]:
+    """Run one scenario eval in a child Python process so native crashes are contained."""
+    env = os.environ.copy()
+    env.setdefault("PYTHONFAULTHANDLER", "1")
+    cmd = [
+        sys.executable,
+        "-m",
+        "lib.eval_summary",
+        "__run_eval_dir",
+        result_dir,
+        "1" if overwrite else "0",
+    ]
+    completed = subprocess.run(
+        cmd,
+        cwd=os.fspath(Path(__file__).resolve().parents[1]),
+        env=env,
+        text=True,
+        capture_output=True,
+    )
+    if completed.returncode == 0:
+        for line in reversed(completed.stdout.splitlines()):
+            if line.startswith("__EVAL_RESULT_JSON__"):
+                try:
+                    return json.loads(line.removeprefix("__EVAL_RESULT_JSON__"))
+                except json.JSONDecodeError:
+                    break
+        return {"path": result_dir, "status": "success", "detail": "completed"}
+
+    detail = f"eval subprocess failed with {_signal_detail(completed.returncode)}"
+    _write_eval_subprocess_failure(
+        result_dir,
+        detail,
+        stdout=completed.stdout,
+        stderr=completed.stderr,
+    )
+    return {"path": result_dir, "status": "failed", "detail": detail}
+
+
+def run_eval_result_for_dir(result_dir: str, overwrite: bool = False) -> Dict[str, Any]:
+    """Run eval_result and generate score.json for one directory. Returns status dict."""
+    isolated = os.environ.get("EVAL_RUN_ISOLATED_SUBPROCESS", "1").lower()
+    if isolated in ("0", "false", "no"):
+        return _run_eval_result_for_dir_inline(result_dir, overwrite=overwrite)
+    return _run_eval_result_for_dir_subprocess(result_dir, overwrite=overwrite)
+
+
 def generate_summary_and_score_csv(input_path: str) -> Dict[str, Any]:
     """
     Generate Summary.csv and Score.csv in input_path from each subdirectory's result.txt and score.json.
@@ -73,6 +182,43 @@ def _infer_suite_name(dir_name: str) -> str:
                 return parts[0]
         return base
 
+    def _dataset_id_from_case_dir(case_dir: str) -> str:
+        """Resolve the real T4 dataset id for Score.csv; blank if unavailable."""
+        case_path = Path(case_dir)
+        metadata_path = case_path / "t4_metadata.json"
+        if metadata_path.exists():
+            try:
+                with open(metadata_path, "r", encoding="utf-8") as f:
+                    meta = json.load(f)
+                dataset_id = str(meta.get("t4_dataset_id") or "").strip()
+                if dataset_id:
+                    return dataset_id
+            except (OSError, json.JSONDecodeError, TypeError, AttributeError):
+                pass
+
+        scenario_path = case_path / "scenario.yaml"
+        if scenario_path.exists():
+            try:
+                import yaml
+
+                with open(scenario_path, "r", encoding="utf-8") as f:
+                    scenario = yaml.safe_load(f) or {}
+                datasets = scenario.get("Evaluation", {}).get("Datasets", [])
+                if isinstance(datasets, list):
+                    for item in datasets:
+                        if isinstance(item, dict) and item:
+                            dataset_id = str(next(iter(item.keys())) or "").strip()
+                            if dataset_id:
+                                return dataset_id
+                elif isinstance(datasets, dict):
+                    dataset_id = str(next(iter(datasets.keys()), "") or "").strip()
+                    if dataset_id:
+                        return dataset_id
+            except (ImportError, OSError, TypeError, AttributeError):
+                pass
+
+        return ""
+
     result_folders = glob.glob(os.path.join(input_path, "*/"))
     result_folders.sort()
     result_entries: List[Dict[str, str]] = []
@@ -94,6 +240,14 @@ def _infer_suite_name(dir_name: str) -> str:
     summary_lines: List[str] = []
     score_lines: List[str] = []
 
+    score_header = "Scenario, Dataset, Option, GT_OBJ,"
+    for _ in range(4):
+        score_header += (
+            "Distance, NM, TP/TN, ADD, AIL, UIL, PFN/PFP, UUID Num, "
+            "Practical Pass Rate, MAX_DIST_THRESH,OBJ_CNTS,"
+        )
+    score_header += "\n"
+
     for entry in result_entries:
         folder = entry["path"]
         suite_name = entry["suite"]
@@ -145,7 +299,11 @@ def _infer_suite_name(dir_name: str) -> str:
         with open(score_json_path, "r", encoding="utf-8") as f:
             dic = json.load(f)
 
-        line = f"{Path(folder).name},"
+        folder_name = Path(folder).name
+        dataset_id = _dataset_id_from_case_dir(folder)
+
+        line = f"{folder_name},"
+        line += f"{dataset_id},"
         line += f"{dic.get('Option', '')},"
         line += f"{dic.get('criteria0', {}).get('GT_OBJ', '')},"
 
@@ -184,17 +342,14 @@ def _infer_suite_name(dir_name: str) -> str:
 
             obj_cnts = v.get("OBJ_CNTS", {})
             if isinstance(obj_cnts, dict):
-                obj_parts = [f"{obj}:{cnt}" for obj, cnt in obj_cnts.items()]
-                line += ";".join(obj_parts)
-            if not is_last:
-                line += ","
+                obj_parts = [f"{obj}:{cnt};" for obj, cnt in obj_cnts.items()]
+                line += "".join(obj_parts)
+            line += ","
 
         score_lines.append(line + "\n")
 
-    with open(os.path.join(input_path, "Summary.csv"), mode="w", encoding="utf-8") as f:
-        f.writelines(summary_lines)
-    with open(os.path.join(input_path, "Score.csv"), mode="w", encoding="utf-8") as f:
-        f.writelines(score_lines)
+    _write_text_atomic(os.path.join(input_path, "Summary.csv"), "".join(summary_lines))
+    _write_text_atomic(os.path.join(input_path, "Score.csv"), score_header + "".join(score_lines))
 
     return {
         "summary_path": os.path.join(input_path, "Summary.csv"),
@@ -202,3 +357,18 @@ def _infer_suite_name(dir_name: str) -> str:
         "summary_rows": len(summary_lines),
         "score_rows": len(score_lines),
     }
+
+
+def _main() -> int:
+    if len(sys.argv) >= 2 and sys.argv[1] == "__run_eval_dir":
+        result_dir = sys.argv[2]
+        overwrite = len(sys.argv) >= 4 and sys.argv[3] == "1"
+        result = _run_eval_result_for_dir_inline(result_dir, overwrite=overwrite)
+        print("__EVAL_RESULT_JSON__" + json.dumps(result, ensure_ascii=False))
+        return 0
+    print("Usage: python -m lib.eval_summary __run_eval_dir <result_dir> <overwrite:0|1>", file=sys.stderr)
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(_main())
diff --git a/evaluation_dashboard_app/lib/evaluator_api.py b/evaluation_dashboard_app/lib/evaluator_api.py
new file mode 100644
index 0000000..726a369
--- /dev/null
+++ b/evaluation_dashboard_app/lib/evaluator_api.py
@@ -0,0 +1,636 @@
+"""
+Evaluator API wrapper for job scheduling and status polling.
+Based on evaluator_run_api.py from EvaluatorRunnerUITest, extended with polling support.
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Optional
+
+import requests
+import webautoauth.requests
+from requests.adapters import HTTPAdapter
+from urllib3.util import Retry
+
+EVALUATION_API_BASE_URL = "https://evaluation.ci.web.auto/v3"
+EVALUATION_REPORT_BASE_URL = "https://evaluation.tier4.jp/evaluation/reports"
+DEFAULT_WEBAUTO_AUTH_PATH = Path.home() / ".webauto" / "auth.toml"
+SUCCESS_JOB_STATUSES = frozenset({"succeeded", "success"})
+FAILED_JOB_STATUSES = frozenset(
+    {
+        "failed",
+        "failure",
+        "error",
+        "canceled",
+        "cancelled",
+        "aborted",
+        "timed_out",
+        "timeout",
+    }
+)
+TERMINAL_JOB_STATUSES = SUCCESS_JOB_STATUSES | FAILED_JOB_STATUSES
+_TEST_STATUS_PATHS = (("test", "status"),)
+_OVERALL_STATUS_PATHS = (
+    ("job", "status"),
+    ("evaluation", "status"),
+    ("status",),
+    ("state",),
+)
+_BUILD_STATUS_PATHS = (("build", "status"),)
+
+
+@dataclass(frozen=True)
+class TestCaseDefinition:
+    test_id: str
+    project_id: str
+    catalog_id: str
+    integration_id: str
+    suite_ids: list[str]
+    catalog_display_name_prefix: str = ""
+
+
+class EvaluationAPIError(RuntimeError):
+    """Raised when the evaluation API returns an unexpected response."""
+
+
+def normalize_job_status(status: Any) -> str:
+    if status is None:
+        return ""
+    return str(status).strip().lower()
+
+
+def _get_first_status(report: dict[str, Any], paths: tuple[tuple[str, ...], ...]) -> str:
+    for path in paths:
+        current: Any = report
+        for key in path:
+            if not isinstance(current, dict):
+                current = None
+                break
+            current = current.get(key)
+
+        status = normalize_job_status(current)
+        if status:
+            return status
+
+    return ""
+
+
+def extract_job_status(report: dict[str, Any]) -> str:
+    """Return the best evaluator status from known report response shapes."""
+    if not isinstance(report, dict):
+        return "unknown"
+
+    test_status = _get_first_status(report, _TEST_STATUS_PATHS)
+    if test_status:
+        return test_status
+
+    overall_status = _get_first_status(report, _OVERALL_STATUS_PATHS)
+    if overall_status:
+        return overall_status
+
+    build_status = _get_first_status(report, _BUILD_STATUS_PATHS)
+    if build_status:
+        return f"build:{build_status}"
+
+    return "unknown"
+
+
+def is_terminal_job_status(status: Any) -> bool:
+    return normalize_job_status(status) in TERMINAL_JOB_STATUSES
+
+
+def is_success_job_status(status: Any) -> bool:
+    return normalize_job_status(status) in SUCCESS_JOB_STATUSES
+
+
+def get_job_completion(report: dict[str, Any]) -> tuple[bool, str]:
+    """
+    Return (is_completed, status) for an evaluator job report.
+
+    Build success only means the build phase is done; evaluator jobs can still be
+    running suites/tests after that. Build failure is terminal because tests cannot
+    proceed, but build success must not unlock downloads by itself.
+    """
+    if not isinstance(report, dict):
+        return False, "unknown"
+
+    status = extract_job_status(report)
+    test_status = _get_first_status(report, _TEST_STATUS_PATHS)
+    if test_status:
+        return is_terminal_job_status(test_status), status
+
+    overall_status = _get_first_status(report, _OVERALL_STATUS_PATHS)
+    if overall_status and is_terminal_job_status(overall_status):
+        return True, status
+
+    build_status = _get_first_status(report, _BUILD_STATUS_PATHS)
+    if build_status in FAILED_JOB_STATUSES:
+        return True, status
+
+    return False, status
+
+
+def load_test_cases(path: Path | str) -> dict[str, dict[str, Any]]:
+    path = Path(path)
+    with path.open("r", encoding="utf-8") as file:
+        return json.load(file)
+
+
+def resolve_test_case(test_id: str, source: Any) -> TestCaseDefinition:
+    test_cases = normalize_test_case_mapping(source)
+    if test_id not in test_cases:
+        raise KeyError(f"Unknown test_id: {test_id}")
+    data = test_cases[test_id]
+    return make_test_case_definition(test_id, data)
+
+
+def make_test_case_definition(test_id: str, data: dict[str, Any]) -> TestCaseDefinition:
+    return TestCaseDefinition(
+        test_id=test_id,
+        project_id=data["project_id"],
+        catalog_id=data["catalog_id"],
+        integration_id=data["integration_id"],
+        suite_ids=list(data.get("suite_ids", [])),
+        catalog_display_name_prefix=data.get("catalog_display_name_prefix", ""),
+    )
+
+
+def normalize_test_case_mapping(source: Any) -> dict[str, dict[str, Any]]:
+    """Normalize a test-case source into a mapping keyed by test_id."""
+    if isinstance(source, dict):
+        return source
+    if isinstance(source, (str, Path)):
+        return load_test_cases(Path(source))
+    raise TypeError("test case source must be a dict or JSON file path")
+
+
+def normalize_test_case_definition(
+    test_case: Any, *, test_id: str = "custom"
+) -> TestCaseDefinition:
+    """Normalize one test case definition."""
+    if isinstance(test_case, TestCaseDefinition):
+        return test_case
+    if isinstance(test_case, dict):
+        return make_test_case_definition(test_id, test_case)
+    raise TypeError("test_case must be a TestCaseDefinition or dict")
+
+
+def get_job_report_url(project_id: str, job_id: str) -> str:
+    return f"{EVALUATION_REPORT_BASE_URL}/{job_id}/?project_id={project_id}"
+
+
+def get_suite_report_url(project_id: str, job_id: str, suite_report_id: str) -> str:
+    return f"{EVALUATION_REPORT_BASE_URL}/{job_id}/tests/{suite_report_id}?project_id={project_id}"
+
+
+def extract_job_id(url: str) -> str:
+    if "/reports/" in url:
+        url = url.split("/reports/")[1]
+        if "/" in url:
+            url = url.split("/")[0]
+        if "?" in url:
+            url = url.split("?")[0]
+    return url
+
+
+def extract_project_id(url: str) -> str:
+    if "project_id=" in url:
+        return url.split("project_id=")[1]
+    return url
+
+
+def _make_session(auth_path: Path | str | None = DEFAULT_WEBAUTO_AUTH_PATH):
+    """Build authenticated session for evaluation.ci.web.auto API."""
+    headers = {
+        "Content-Type": "application/json",
+        "accept": "application/json",
+    }
+    if auth_path is not None:
+        auth_path = Path(auth_path).expanduser().resolve()
+        if not auth_path.exists():
+            raise FileNotFoundError(f"webauto auth config not found: {auth_path}")
+    from webautoauth.token import HttpService, TokenSource, load_config
+
+    config = load_config()
+    token_source = TokenSource(HttpService(config))
+    session = webautoauth.requests.make_session(token_source)
+    presigned = requests.Session()
+    retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
+    presigned.mount("http://", HTTPAdapter(max_retries=retries))
+    presigned.mount("https://", HTTPAdapter(max_retries=retries))
+    return session, presigned, headers
+
+
+def get_evaluator_session(environment: str = "default"):
+    """Public API: same session as worker. Returns (session, presigned, headers)."""
+    import os
+    os.environ["AUTH_PROFILE"] = environment
+    return _make_session()
+
+
+class EvaluationRunAPI:
+    """Minimal wrapper for scheduling evaluation jobs and collecting reports."""
+
+    def __init__(
+        self,
+        api_base_url: str = EVALUATION_API_BASE_URL,
+        *,
+        auth_path: Path | str | None = DEFAULT_WEBAUTO_AUTH_PATH,
+        test_cases: Optional[dict[str, dict[str, Any]]] = None,
+    ) -> None:
+        self.api_base_url = api_base_url.rstrip("/")
+        self._session, self._presigned, self._headers = _make_session(auth_path)
+        self.test_cases = test_cases or {}
+
+    def request(self, url: str, params: Optional[dict[str, Any]] = None, method: str = "GET"):
+        if method == "GET":
+            from urllib.parse import urlencode
+            if params:
+                return self._session.get(f"{url}?{urlencode(params)}", headers=self._headers)
+            return self._session.get(url, headers=self._headers)
+
+        if method == "POST":
+            if params is None:
+                return self._session.post(url, headers=self._headers)
+            return self._session.post(
+                url,
+                data=json.dumps(params).encode("utf-8"),
+                headers=self._headers,
+            )
+
+        raise ValueError(f"Unsupported method: {method}")
+
+    def schedule_job(
+        self,
+        *,
+        project_id: str,
+        catalog_id: str,
+        integration_id: Optional[str] = None,
+        target_name: Optional[str] = None,
+        source_job_id: Optional[str] = None,
+        suite_ids: Optional[list[str]] = None,
+        max_retries: int = 1,
+        description: str = "no description",
+        clean_build: bool = False,
+        debug: bool = False,
+        release: bool = False,
+        record_caret: bool = False,
+        log_expiration_time_in_days: float = 14.0,
+        is_tag: bool = False,
+    ) -> dict[str, Any]:
+        if not source_job_id and not target_name:
+            raise ValueError("Either target_name or source_job_id must be provided.")
+        payload = {
+            "build_options": {
+                "clean_build": clean_build,
+                "debug": debug,
+            },
+            "catalog_id": catalog_id,
+            "description": description,
+            "release": release,
+            "suite_ids": suite_ids or [],
+            "test_options": {
+                "max_retries": max_retries,
+                "record_caret": record_caret,
+                "log_expiration_time": int(log_expiration_time_in_days * 24 * 60 * 60),
+            },
+        }
+        if integration_id:
+            payload["integration_id"] = integration_id
+        if source_job_id:
+            payload["source_job_id"] = str(source_job_id)
+        if target_name:
+            payload["source"] = {"git_tag" if is_tag else "git_branch": str(target_name)}
+        if record_caret:
+            payload["build_options"]["developer_option_names"] = [
+                "webauto:ci:caret_enabled"
+            ]
+
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/schedule"
+        response = self.request(url, payload, method="POST")
+        if response is None:
+            raise EvaluationAPIError("No response returned from evaluation API")
+        if response.status_code != 202:
+            raise EvaluationAPIError(
+                f"Failed to schedule job: status={response.status_code}, body={response.text}"
+            )
+        return json.loads(response.content)
+
+    def schedule_job_by_test_id(
+        self,
+        test_id: str,
+        *,
+        target_name: str,
+        test_cases: Any = None,
+        max_retries: int = 1,
+        description: str = "no description",
+        clean_build: bool = False,
+        debug: bool = False,
+        release: bool = False,
+        record_caret: bool = False,
+        log_expiration_time_in_days: float = 14.0,
+        is_tag: bool = False,
+    ) -> dict[str, Any]:
+        if test_cases is None:
+            if not self.test_cases:
+                raise ValueError(
+                    "No test case source provided. Pass `test_cases=...` or use schedule_job()."
+                )
+            source = self.test_cases
+        else:
+            source = test_cases
+
+        test_case = resolve_test_case(test_id, source)
+        return self.schedule_job(
+            project_id=test_case.project_id,
+            catalog_id=test_case.catalog_id,
+            integration_id=test_case.integration_id,
+            target_name=target_name,
+            suite_ids=test_case.suite_ids,
+            max_retries=max_retries,
+            description=description,
+            clean_build=clean_build,
+            debug=debug,
+            release=release,
+            record_caret=record_caret,
+            log_expiration_time_in_days=log_expiration_time_in_days,
+            is_tag=is_tag,
+        )
+
+    def schedule_job_by_definition(
+        self,
+        test_case: TestCaseDefinition | dict[str, Any],
+        *,
+        target_name: str,
+        test_id: str = "custom",
+        max_retries: int = 1,
+        description: str = "no description",
+        clean_build: bool = False,
+        debug: bool = False,
+        release: bool = False,
+        record_caret: bool = False,
+        log_expiration_time_in_days: float = 14.0,
+        is_tag: bool = False,
+    ) -> dict[str, Any]:
+        definition = normalize_test_case_definition(test_case, test_id=test_id)
+        return self.schedule_job(
+            project_id=definition.project_id,
+            catalog_id=definition.catalog_id,
+            integration_id=definition.integration_id,
+            target_name=target_name,
+            suite_ids=definition.suite_ids,
+            max_retries=max_retries,
+            description=description,
+            clean_build=clean_build,
+            debug=debug,
+            release=release,
+            record_caret=record_caret,
+            log_expiration_time_in_days=log_expiration_time_in_days,
+            is_tag=is_tag,
+        )
+
+    def get_job_status(self, project_id: str, job_id: str) -> dict[str, Any]:
+        """Get current job status from the API."""
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/report"
+        response = self.request(url, {})
+        if response is None:
+            raise EvaluationAPIError("No response returned from evaluation API")
+        if response.status_code != 200:
+            raise EvaluationAPIError(
+                f"Failed to get job status: status={response.status_code}, body={response.text}"
+            )
+        return json.loads(response.content)
+
+    def is_job_completed(self, project_id: str, job_id: str) -> tuple[bool, str, dict[str, Any]]:
+        """
+        Check if a job has completed (success or failure).
+        Returns (is_completed, status, report_data).
+        Status can be: 'pending', 'running', 'succeeded', 'failed', 'canceled', 'unknown'
+        """
+        report = self.get_job_status(project_id, job_id)
+        
+        is_completed, status = get_job_completion(report)
+        
+        return is_completed, status, report
+
+    def wait_for_job_completion(
+        self,
+        project_id: str,
+        job_id: str,
+        poll_interval: float = 60.0,
+        max_wait_seconds: float = 3600.0 * 24 * 7,  # Default 1 week
+        on_progress: Optional[Callable[[str], None]] = None,
+        on_check: Optional[Callable[[str, float], None]] = None,
+    ) -> dict[str, Any]:
+        """
+        Poll job status until completion or timeout.
+        
+        Args:
+            project_id: Project ID
+            job_id: Job ID to wait for
+            poll_interval: Seconds between status checks (default 60s)
+            max_wait_seconds: Maximum seconds to wait (default 1 week)
+            on_progress: Callback for progress messages (receives message string)
+            on_check: Callback after each check (receives status string, elapsed seconds)
+        
+        Returns:
+            Final job report dict
+        
+        Raises:
+            EvaluationAPIError: If timeout or API error
+        """
+        start_time = time.time()
+        last_status = "unknown"
+        
+        if on_progress:
+            on_progress(f"Waiting for evaluator job {job_id} to complete...")
+        
+        while True:
+            elapsed = time.time() - start_time
+            
+            # Check timeout
+            if elapsed > max_wait_seconds:
+                raise EvaluationAPIError(
+                    f"Timeout waiting for job {job_id} after {elapsed:.0f}s"
+                )
+            
+            try:
+                is_completed, status, report = self.is_job_completed(project_id, job_id)
+                last_status = status
+                
+                if on_check:
+                    on_check(status, elapsed)
+                
+                if is_completed:
+                    if on_progress:
+                        on_progress(f"Job {job_id} completed with status: {status}")
+                    return report
+                
+                # Log progress periodically (every 5 minutes or on status change)
+                if on_progress and (elapsed < 60 or int(elapsed) % 300 < poll_interval):
+                    on_progress(
+                        f"Job {job_id} status: {status} (elapsed: {elapsed/3600:.1f}h)"
+                    )
+                
+            except Exception as e:
+                if on_progress:
+                    on_progress(f"Error checking job status: {e}")
+                # Continue polling on transient errors
+            
+            time.sleep(poll_interval)
+
+    def get_report_list(
+        self,
+        project_id: str,
+        *,
+        status: str = "all",
+        max_results: Optional[int] = None,
+        catalog_id: Optional[str] = None,
+    ) -> list[dict[str, Any]]:
+        reports: list[dict[str, Any]] = []
+        next_token = ""
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/reports"
+        while True:
+            params = {
+                "next_token": next_token,
+                "size": 100,
+                "status": status,
+            }
+            if catalog_id is not None:
+                params["catalog_id"] = catalog_id
+
+            response = self.request(url, params)
+            if response is None:
+                raise EvaluationAPIError("No response returned from evaluation API")
+            if response.status_code != 200:
+                raise EvaluationAPIError(
+                    f"Failed to fetch report list: status={response.status_code}, body={response.text}"
+                )
+
+            data = json.loads(response.content)
+            reports.extend(data.get("reports", []))
+            next_token = data.get("next_token", "")
+            if next_token == "":
+                return reports
+            if max_results is not None and len(reports) >= max_results:
+                return reports[:max_results]
+
+    def search_report_list(
+        self,
+        project_id: str,
+        *,
+        filters: Optional[list[dict[str, Any]]] = None,
+        sort: Optional[list[dict[str, Any]]] = None,
+        next_token: str = "",
+        size: int = 100,
+    ) -> dict[str, Any]:
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/reports/search"
+        payload: dict[str, Any] = {
+            "size": max(1, min(int(size), 100)),
+        }
+        if next_token:
+            payload["next_token"] = next_token
+        if filters:
+            payload["filters"] = filters
+        if sort:
+            payload["sort"] = sort
+
+        response = self.request(url, payload, method="POST")
+        if response is None:
+            raise EvaluationAPIError("No response returned from evaluation API")
+        if response.status_code != 200:
+            raise EvaluationAPIError(
+                f"Failed to search report list: status={response.status_code}, body={response.text}"
+            )
+        return json.loads(response.content)
+
+    def get_suite_reports(self, project_id: str, job_id: str) -> list[dict[str, Any]]:
+        return self._get_paginated_reports(
+            f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/test/suite/reports"
+        )
+
+    def get_spec_reports(self, project_id: str, job_id: str) -> list[dict[str, Any]]:
+        return self._get_paginated_reports(
+            f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/test/spec/reports"
+        )
+
+    def get_case_reports(self, project_id: str, job_id: str) -> list[dict[str, Any]]:
+        return self._get_paginated_reports(
+            f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/test/case/reports"
+        )
+
+    def get_build_reports(self, project_id: str, job_id: str) -> dict[str, Any]:
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/build/reports"
+        response = self.request(url, {})
+        if response is None:
+            raise EvaluationAPIError("No response returned from evaluation API")
+        if response.status_code != 200:
+            raise EvaluationAPIError(
+                f"Failed to fetch build reports: status={response.status_code}, body={response.text}"
+            )
+        return json.loads(response.content)
+
+    def get_job_report(self, project_id: str, job_id: str) -> dict[str, Any]:
+        url = f"{self.api_base_url}/projects/{project_id}/jobs/{job_id}/report"
+        response = self.request(url, {})
+        if response is None:
+            raise EvaluationAPIError("No response returned from evaluation API")
+        if response.status_code != 200:
+            raise EvaluationAPIError(
+                f"Failed to fetch job report: status={response.status_code}, body={response.text}"
+            )
+        return json.loads(response.content)
+
+    def get_suite_summary(
+        self,
+        project_id: str,
+        job_id: str,
+        *,
+        use_available_case_results: bool = False,
+    ) -> list[dict[str, Any]]:
+        mode = "available_case_results" if use_available_case_results else "case_results"
+        summaries: list[dict[str, Any]] = []
+        for suite_report in self.get_suite_reports(project_id, job_id):
+            if mode not in suite_report:
+                continue
+
+            result = suite_report[mode]
+            cancellation_count = result.get("cancellation_count", 0)
+            summaries.append(
+                {
+                    "name": suite_report["suite"]["display_name"],
+                    "all": result["total_count"] + cancellation_count,
+                    "success": result["success_count"],
+                    "fail": result["failure_count"] + cancellation_count,
+                    "cancel": cancellation_count,
+                    "simulation": suite_report["simulation"]["name"],
+                    "url": get_suite_report_url(project_id, job_id, suite_report["id"]),
+                }
+            )
+        return summaries
+
+    def _get_paginated_reports(self, url: str) -> list[dict[str, Any]]:
+        reports: list[dict[str, Any]] = []
+        next_token = ""
+        while True:
+            params = {
+                "next_token": next_token,
+                "size": 100,
+            }
+            response = self.request(url, params)
+            if response is None:
+                raise EvaluationAPIError("No response returned from evaluation API")
+            if response.status_code != 200:
+                raise EvaluationAPIError(
+                    f"Failed to fetch paginated reports: status={response.status_code}, body={response.text}"
+                )
+
+            data = json.loads(response.content)
+            reports.extend(data.get("reports", []))
+            next_token = data.get("next_token", "")
+            if next_token == "":
+                return reports
diff --git a/evaluation_dashboard_app/lib/mermaid_render.py b/evaluation_dashboard_app/lib/mermaid_render.py
new file mode 100644
index 0000000..47a72d1
--- /dev/null
+++ b/evaluation_dashboard_app/lib/mermaid_render.py
@@ -0,0 +1,30 @@
+"""Render Mermaid diagrams in Streamlit via Mermaid.js (Streamlit markdown does not run Mermaid)."""
+
+import json
+import uuid
+
+import streamlit.components.v1 as components
+
+
+def render_mermaid(definition: str, *, height: int = 480) -> None:
+    """Render a Mermaid diagram inside an HTML iframe (CDN script)."""
+    defn_json = json.dumps(definition.strip())
+    uid = uuid.uuid4().hex[:12]
+    html = f"""
+<div id="mermaid-host-{uid}" style="overflow:auto;max-width:100%;padding:0.25rem 0;"></div>
+<script src="https://cdn.jsdelivr.net/npm/mermaid@10.9.0/dist/mermaid.min.js"></script>
+<script>
+(function() {{
+  const defn = {defn_json};
+  const host = document.getElementById("mermaid-host-{uid}");
+  mermaid.initialize({{ startOnLoad: false, theme: "neutral", securityLevel: "loose" }});
+  const graphId = "mermaid-graph-{uid}";
+  mermaid.render(graphId, defn).then(function(res) {{
+    host.innerHTML = res.svg;
+  }}).catch(function(err) {{
+    host.textContent = "Mermaid diagram could not be rendered: " + String(err);
+  }});
+}})();
+</script>
+"""
+    components.html(html, height=height, scrolling=True)
diff --git a/evaluation_dashboard_app/lib/overview_pdf_report.py b/evaluation_dashboard_app/lib/overview_pdf_report.py
new file mode 100644
index 0000000..1547f55
--- /dev/null
+++ b/evaluation_dashboard_app/lib/overview_pdf_report.py
@@ -0,0 +1,2181 @@
+from __future__ import annotations
+
+import io
+import html
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
+
+import duckdb
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
+from lib.score_schema import (
+    SCORE_BLOCK_SIZE,
+    SCORE_NUM_COLS,
+    SCORE_VIEW_METRIC_COLS,
+    build_score_view,
+    infer_score_criteria_count,
+    score_identity_cols,
+)
+from lib.summary_compare import build_summary_delta
+
+PRODUCT_LABEL_JA_DEFAULT = {
+    "Occlusion-Case": "遮蔽ケース",
+    "False-Positive-Grass": "草誤検知（草停止）",
+    "False-Positive-Ground": "地面誤検知",
+    "False-Positive-Splash": "水しぶき 誤検知",
+    "False-Positive-Exhaust-Fog": "排ガス・霧 誤検知",
+    "Missed-Detection-Animal": "動物ロスト（犬）",
+    "Missed-Detection-Falling-Object": "落下物未検知",
+    "Missed-Detection-Pedestrian-Child": "歩行者未検知：子供",
+    "Missed-Detection-Pedestrian-Umbrella": "歩行者未検知：傘",
+    "Missed-Detection-Pedestrian-Crouching": "歩行者未検知：しゃがむ",
+    "Missed-Detection-Pedestrian-Near-Structure": "歩行者未検知：構造物に近い",
+    "False-Positive-Truck": "トラック誤検知",
+    "Pose-Estimation-Yaw-Error": "Yawおかしい",
+    "Long-Range-Detection-Failure": "遠方見えない",
+    "Ghost-Object": "ミサイル",
+    "Sudden-Fast-Vehicle-Ghost": "高速車両の突然出現・急ブレーキ誘発",
+    "Misclassification-Structure-Grass-as-Pedestrian": "構造物・草を人に誤検知",
+    "Misclassification-Structure-Grass-as-Vehicle": "構造物・草を車両に誤検知",
+    "Misclassification-Bike-Motorcycle": "自転車・バイクのミスラベル",
+    "Missed-Detection-Unridden-Bike": "人の乗ってないバイク自転車ロスト",
+    "Missed-Detection-Traffic-Cone": "カラーコーンが認識できない",
+    "Missed-Detection-Other": "その他ロスト",
+}
+
+_COMPARE_RUN_COLORS = ["#312e81", "#0f766e", "#e86a33", "#6b8e23", "#9b59b6", "#1abc9c"]
+_OVERVIEW_COMPARE_COLORS = ["#31356E", "#008E9B", "#E86A33", "#6B8E23", "#9B59B6", "#1ABC9C"]
+_CRITERIA_COLS = SCORE_VIEW_METRIC_COLS
+_NUM_COLS = SCORE_NUM_COLS
+_BLOCK_SIZE = SCORE_BLOCK_SIZE
+_DEFAULT_MAX_EVAL_RANGE = 50
+_DISTANCE_BIN_CASE = """CASE
+    WHEN dist_h < 10 THEN '[0,10)'
+    WHEN dist_h < 20 THEN '[10,20)'
+    WHEN dist_h < 30 THEN '[20,30)'
+    WHEN dist_h < 40 THEN '[30,40)'
+    WHEN dist_h < 50 THEN '[40,50)'
+    WHEN dist_h < 60 THEN '[50,60)'
+    WHEN dist_h < 70 THEN '[60,70)'
+    WHEN dist_h < 80 THEN '[70,80)'
+    WHEN dist_h < 90 THEN '[80,90)'
+    WHEN dist_h < 100 THEN '[90,100)'
+    WHEN dist_h < 110 THEN '[100,110)'
+    WHEN dist_h < 120 THEN '[110,120)'
+    WHEN dist_h < 130 THEN '[120,130)'
+    WHEN dist_h < 140 THEN '[130,140)'
+    WHEN dist_h < 150 THEN '[140,150)'
+    ELSE '[150,inf)'
+END"""
+
+
+def make_report_filename(
+    run_names: Sequence[str],
+    *,
+    now: Optional[datetime] = None,
+    prefix: str = "overview_report",
+) -> str:
+    ts = (now or datetime.now()).strftime("%Y%m%d_%H%M%S")
+    slug = _slugify(run_names[0] if run_names else "report")
+    return f"{prefix}_{slug}_{ts}.pdf"
+
+
+def build_overview_pdf_report(
+    *,
+    mode: str,
+    run_records: Sequence[dict],
+    run_labels: Sequence[str],
+    filters: Optional[dict] = None,
+    product_label_map: Optional[dict] = None,
+    generated_at: Optional[datetime] = None,
+    progress_callback: Optional[Callable[[str], None]] = None,
+) -> bytes:
+    reportlab_import_error = _ensure_reportlab_available()
+    if reportlab_import_error is not None:
+        raise RuntimeError(reportlab_import_error)
+
+    from reportlab.lib import colors
+    from reportlab.lib.enums import TA_LEFT
+    from reportlab.lib.pagesizes import A4
+    from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+    from reportlab.lib.units import inch
+    from reportlab.lib.utils import ImageReader
+    from reportlab.platypus import (
+        Image,
+        PageBreak,
+        Paragraph,
+        SimpleDocTemplate,
+        Spacer,
+        Table,
+        TableStyle,
+    )
+
+    product_label_map = product_label_map or PRODUCT_LABEL_JA_DEFAULT
+    generated_at = generated_at or datetime.now()
+    filters = filters or {}
+
+    def _notify(message: str) -> None:
+        if progress_callback is not None:
+            progress_callback(message)
+
+    styles = getSampleStyleSheet()
+    title_style = ParagraphStyle(
+        "ReportTitle",
+        parent=styles["Title"],
+        fontSize=22,
+        leading=28,
+        alignment=TA_LEFT,
+        textColor=colors.HexColor("#0f172a"),
+    )
+    section_style = ParagraphStyle(
+        "SectionHeader",
+        parent=styles["Heading1"],
+        fontSize=16,
+        leading=21,
+        spaceAfter=8,
+        textColor=colors.HexColor("#0f172a"),
+    )
+    body_style = ParagraphStyle(
+        "Body",
+        parent=styles["BodyText"],
+        fontSize=10.5,
+        leading=14,
+        textColor=colors.HexColor("#334155"),
+    )
+    caption_style = ParagraphStyle(
+        "Caption",
+        parent=styles["BodyText"],
+        fontSize=9,
+        leading=12,
+        textColor=colors.HexColor("#475569"),
+    )
+
+    buffer = io.BytesIO()
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=A4,
+        rightMargin=0.55 * inch,
+        leftMargin=0.55 * inch,
+        topMargin=0.55 * inch,
+        bottomMargin=0.55 * inch,
+        title="Overview PDF Report",
+    )
+    content_width = doc.width
+    story: List[Any] = []
+    _notify("Preparing cover and active filter summary")
+
+    run_names = [Path(str(r.get("path", ""))).name or f"Run {lbl}" for r, lbl in zip(run_records, run_labels)]
+    story.extend(
+        [
+            Paragraph("Evaluation Dashboard Report", title_style),
+            Spacer(1, 8),
+            Paragraph(
+                f"Generated {generated_at.strftime('%Y-%m-%d %H:%M:%S')} · "
+                f"{'Compare mode' if mode == 'Compare Mode' else 'Single mode'}",
+                body_style,
+            ),
+            Spacer(1, 8),
+            _styled_table(
+                [["Run", "Label", "Directory"]] + [
+                    [f"Run {lbl}", name, str(record.get("path", ""))]
+                    for record, lbl, name in zip(run_records, run_labels, run_names)
+                ],
+                content_width,
+            ),
+            Spacer(1, 12),
+            Paragraph(
+                f"Perception labels: {_summarize_filter_values(filters.get('perception_labels'))}<br/>"
+                f"Product labels: {_summarize_filter_values(filters.get('product_labels'))}",
+                body_style,
+            ),
+            Spacer(1, 16),
+        ]
+    )
+
+    _notify("Building Overview section")
+    overview_section = _build_overview_section(run_records, run_labels, product_label_map)
+    _notify("Building TP Summary section")
+    tp_section = _build_tp_summary_section(run_records, run_labels, product_label_map)
+    _notify("Building Criteria Based Score section")
+    criteria_section = _build_criteria_section(run_records, run_labels)
+    _notify("Building Detection Stats section")
+    detection_section = _build_detection_section(run_records, run_labels)
+
+    sections = [
+        ("Overview", overview_section),
+        ("TP Summary", tp_section),
+        ("Criteria Based Score", criteria_section),
+        ("Detection Stats", detection_section),
+    ]
+
+    available_sections = 0
+    for idx, (title, payload) in enumerate(sections):
+        story.append(Paragraph(title, section_style))
+        story.append(Paragraph(payload["summary"], body_style))
+        story.append(Spacer(1, 8))
+        if payload.get("flowables"):
+            for flowable in payload["flowables"]:
+                story.append(flowable)
+                story.append(Spacer(1, 8))
+        if payload.get("tables"):
+            for table in payload["tables"]:
+                story.append(_styled_table(table, content_width))
+                story.append(Spacer(1, 8))
+        figs = payload.get("figures", [])
+        if figs:
+            exported_any_fig = False
+            for fig, caption in figs:
+                try:
+                    story.append(_plotly_figure_to_image(fig, content_width, ImageReader))
+                    story.append(Spacer(1, 4))
+                    story.append(Paragraph(caption, caption_style))
+                    exported_any_fig = True
+                except Exception as exc:
+                    story.append(
+                        Paragraph(
+                            f"Chart export unavailable for this figure: {str(exc)}",
+                            caption_style,
+                        )
+                    )
+                story.append(Spacer(1, 12))
+            if exported_any_fig:
+                available_sections += 1
+        else:
+            if payload.get("tables"):
+                available_sections += 1
+            story.append(Paragraph(payload.get("fallback_note", "Section unavailable."), caption_style))
+            story.append(Spacer(1, 12))
+        if idx != len(sections) - 1:
+            story.append(PageBreak())
+
+    if available_sections == 0:
+        story.append(
+            Paragraph(
+                "No report sections were available for export. Check Summary.csv, Score.csv, and parquet data for the selected run(s).",
+                body_style,
+            )
+        )
+
+    def _draw_page_number(canvas, document):
+        canvas.setFont("Helvetica", 9)
+        canvas.setFillColor(colors.HexColor("#64748b"))
+        canvas.drawRightString(document.pagesize[0] - document.rightMargin, 18, f"Page {document.page}")
+
+    _notify("Assembling PDF pages")
+    doc.build(story, onFirstPage=_draw_page_number, onLaterPages=_draw_page_number)
+    _notify("Finalizing PDF bytes")
+    return buffer.getvalue()
+
+
+def _build_tp_summary_section(
+    run_records: Sequence[dict],
+    run_labels: Sequence[str],
+    product_label_map: dict,
+) -> dict:
+    available = [r for r in run_records if r.get("summary") is not None]
+    if not available:
+        return {
+            "summary": "Summary.csv is not available for the selected run set.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "TP Summary skipped because Summary.csv is missing.",
+        }
+
+    summaries = [r["summary"] for r in run_records if r.get("summary") is not None]
+    labels = [run_labels[i] for i, r in enumerate(run_records) if r.get("summary") is not None]
+    figures: List[Tuple[go.Figure, str]] = []
+    tables: List[list[list[str]]] = []
+
+    metrics_table = [["Run", "Rows", "TP mean", "XRMS mean", "YRMS mean", "XSTD mean", "YSTD mean"]]
+    for lbl, df in zip(labels, summaries):
+        metrics_table.append(
+            [
+                lbl,
+                f"{len(df):,}",
+                _fmt_number(df["TP"].mean()),
+                _fmt_number(df["xrms"].mean()),
+                _fmt_number(df["yrms"].mean()),
+                _fmt_number(df["xstd"].mean()),
+                _fmt_number(df["ystd"].mean()),
+            ]
+        )
+    tables.append(metrics_table)
+
+    if len(summaries) >= 2:
+        baseline_lbl = labels[0]
+        for cand_idx in range(1, len(summaries)):
+            cand_lbl = labels[cand_idx]
+            delta_df = build_summary_delta(summaries[0], summaries[cand_idx])
+            if delta_df.empty:
+                figures.append(
+                    (
+                        _make_text_placeholder_figure(
+                            f"No overlapping Summary rows for delta ({cand_lbl} vs {baseline_lbl})."
+                        ),
+                        f"Delta view is empty because baseline {baseline_lbl} and candidate {cand_lbl} do not share Summary keys.",
+                    )
+                )
+            else:
+                figures.extend(_build_tp_default_compare_figures(delta_df, cand_lbl))
+    else:
+        figures.extend(_build_tp_default_single_figures(summaries[0]))
+
+    return {
+        "summary": "This section follows the default TP Summary page view as closely as possible using the current Overview-selected runs and filters.",
+        "figures": figures,
+        "tables": tables,
+        "fallback_note": "No TP Summary figures were available after filtering.",
+    }
+
+
+def _build_overview_section(
+    run_records: Sequence[dict],
+    run_labels: Sequence[str],
+    product_label_map: dict,
+) -> dict:
+    summary_runs = [(run_labels[i], r["summary"]) for i, r in enumerate(run_records) if r.get("summary") is not None]
+    if not summary_runs:
+        return {
+            "summary": "Overview metrics are unavailable because Summary.csv is missing for the selected run set.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "Overview section skipped because Summary.csv is missing.",
+        }
+
+    tables: List[list[list[str]]] = []
+    figures: List[Tuple[go.Figure, str]] = []
+    flowables: List[Any] = []
+
+    metric_card_rows = [["Run", "TP mean", "XRMS", "YRMS", "XSTD", "YSTD"]]
+    for lbl, df in summary_runs:
+        metric_card_rows.append(
+            [
+                lbl,
+                _fmt_number(df["TP"].mean()),
+                _fmt_number(df["xrms"].mean()),
+                _fmt_number(df["yrms"].mean()),
+                _fmt_number(df["xstd"].mean()),
+                _fmt_number(df["ystd"].mean()),
+            ]
+        )
+    tables.append(metric_card_rows)
+    flowables.extend(_build_overview_metric_cards(summary_runs))
+
+    summaries = [df for _, df in summary_runs]
+    labels = [lbl for lbl, _ in summary_runs]
+    fig_perception = _build_tp_mean_by_label_compare_figure(summaries, labels, "perception_label")
+    if fig_perception is not None:
+        figures.append((fig_perception, "Overview page result: TP mean by Perception Label."))
+    fig_product = _build_tp_mean_by_label_compare_figure(
+        summaries,
+        labels,
+        "product_label",
+        label_jp_map=product_label_map,
+    )
+    if fig_product is not None:
+        figures.append((fig_product, "Overview page result: TP mean by Product Label."))
+
+    return {
+        "summary": "This section mirrors the Overview page first: summary metrics and TP mean by label using the current Overview run selection and label filters.",
+        "flowables": flowables,
+        "figures": figures,
+        "tables": tables,
+        "fallback_note": "Overview figures were unavailable after filtering.",
+    }
+
+
+def _build_overview_metric_cards(summary_runs: Sequence[Tuple[str, pd.DataFrame]]) -> List[Any]:
+    from reportlab.lib import colors
+    from reportlab.platypus import Table, TableStyle
+
+    card_cells: List[Any] = []
+    for idx, (lbl, df) in enumerate(summary_runs):
+        accent = _compare_color(idx)
+        rows = [
+            [f"Run {lbl}"],
+            [f"TP mean  {_fmt_number(df['TP'].mean())}"],
+            [f"XRMS  {_fmt_number(df['xrms'].mean())}    YRMS  {_fmt_number(df['yrms'].mean())}"],
+            [f"XSTD  {_fmt_number(df['xstd'].mean())}    YSTD  {_fmt_number(df['ystd'].mean())}"],
+        ]
+        t = Table(rows, colWidths=[220])
+        t.setStyle(
+            TableStyle(
+                [
+                    ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor(accent)),
+                    ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+                    ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+                    ("FONTSIZE", (0, 0), (-1, 0), 11),
+                    ("BACKGROUND", (0, 1), (-1, -1), colors.HexColor("#f8fafc")),
+                    ("TEXTCOLOR", (0, 1), (-1, -1), colors.HexColor("#0f172a")),
+                    ("FONTNAME", (0, 1), (-1, -1), "Helvetica-Bold"),
+                    ("FONTSIZE", (0, 1), (-1, -1), 10),
+                    ("BOX", (0, 0), (-1, -1), 0.8, colors.HexColor("#cbd5e1")),
+                    ("ROUNDEDCORNERS", [10, 10, 10, 10]),
+                    ("LEFTPADDING", (0, 0), (-1, -1), 10),
+                    ("RIGHTPADDING", (0, 0), (-1, -1), 10),
+                    ("TOPPADDING", (0, 0), (-1, -1), 8),
+                    ("BOTTOMPADDING", (0, 0), (-1, -1), 8),
+                ]
+            )
+        )
+        card_cells.append(t)
+
+    if not card_cells:
+        return []
+
+    cards_per_row = 2
+    grid_rows: List[List[Any]] = []
+    for start in range(0, len(card_cells), cards_per_row):
+        row = card_cells[start : start + cards_per_row]
+        if len(row) < cards_per_row:
+            row = row + ["" for _ in range(cards_per_row - len(row))]
+        grid_rows.append(row)
+
+    grid = Table(grid_rows, colWidths=[260, 260], hAlign="LEFT")
+    grid.setStyle(
+        TableStyle(
+            [
+                ("VALIGN", (0, 0), (-1, -1), "TOP"),
+                ("LEFTPADDING", (0, 0), (-1, -1), 0),
+                ("RIGHTPADDING", (0, 0), (-1, -1), 12),
+                ("TOPPADDING", (0, 0), (-1, -1), 0),
+                ("BOTTOMPADDING", (0, 0), (-1, -1), 0),
+            ]
+        )
+    )
+    return [grid]
+
+
+def _build_criteria_section(run_records: Sequence[dict], run_labels: Sequence[str]) -> dict:
+    score_runs = [(run_labels[i], r) for i, r in enumerate(run_records) if r.get("score") is not None]
+    if not score_runs:
+        return {
+            "summary": "Score.csv is not available for the selected run set.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "Criteria section skipped because Score.csv is missing.",
+        }
+
+    criteria_count = min(infer_score_criteria_count(rec["score"]) for _, rec in score_runs)
+    if criteria_count <= 0:
+        return {
+            "summary": "Score.csv was loaded, but no criteria blocks were detected.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "Criteria section skipped because no criteria blocks were found.",
+        }
+
+    criteria_idx = 0
+    views: List[Tuple[str, pd.DataFrame]] = []
+    for lbl, rec in score_runs:
+        df_view = _build_score_view(rec["score"], criteria_idx)
+        if not df_view.empty:
+            df_view["Run"] = lbl
+            views.append((lbl, df_view))
+
+    if not views:
+        return {
+            "summary": "Criteria data was present but could not be shaped into a report view.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "Criteria section skipped because the selected rows were empty.",
+        }
+
+    combined = pd.concat([df for _, df in views], ignore_index=True)
+    tables = [
+        [["Run", "Rows", "Pass rate mean", "Pass rate median", "NM mean"]]
+        + [
+            [
+                lbl,
+                f"{len(df):,}",
+                _fmt_number(df["pass_rate"].mean()),
+                _fmt_number(df["pass_rate"].median()),
+                _fmt_number(df["nm"].mean()),
+            ]
+            for lbl, df in views
+        ]
+    ]
+
+    figures: List[Tuple[go.Figure, str]] = []
+    if len(views) >= 2:
+        figures.extend(_build_criteria_default_compare_figures(views))
+        scenario_table = _build_criteria_compare_table(views)
+    else:
+        figures.extend(_build_criteria_default_single_figures(views[0][1]))
+        scenario_table = _build_criteria_single_table(views[0][1])
+    if scenario_table:
+        tables.append(scenario_table)
+
+    return {
+        "summary": "This section follows the default Criteria Based Score page setup: criteria0, metric=pass_rate, and group_by=GT_OBJ.",
+        "figures": figures,
+        "tables": tables,
+        "fallback_note": "Criteria charts were unavailable for the selected run set.",
+    }
+
+
+def _build_detection_section(run_records: Sequence[dict], run_labels: Sequence[str]) -> dict:
+    parquet_paths: List[Tuple[str, str]] = []
+    for rec, lbl in zip(run_records, run_labels):
+        files = sorted(Path(rec["path"]).glob("*.parquet"))
+        if files:
+            parquet_paths.append((lbl, str(files[0])))
+
+    if not parquet_paths:
+        return {
+            "summary": "No parquet files were found in the selected run set.",
+            "figures": [],
+            "tables": [],
+            "fallback_note": "Detection Stats skipped because parquet data is missing.",
+        }
+
+    con = duckdb.connect()
+    views: List[Tuple[str, str]] = []
+    try:
+        for idx, (lbl, pq) in enumerate(parquet_paths):
+            view_name = "pdf_eval_flat" if idx == 0 else f"pdf_eval_flat_{idx}"
+            _create_eval_flat_view(con, pq, view_name)
+            views.append((lbl, view_name))
+
+        tables = [[["Run", "TP", "FP", "FN", "TPR", "Precision", "F1"]]]
+        figures: List[Tuple[go.Figure, str]] = []
+
+        kpi_rows = [["Run", "TP", "FP", "FN", "TPR", "Precision", "F1"]]
+        for lbl, view in views:
+            kpi = _kpi_row_for_view(con, view)
+            if kpi is None:
+                continue
+            kpi_rows.append(
+                [
+                    lbl,
+                    f"{kpi['tp']:,}",
+                    f"{kpi['fp']:,}",
+                    f"{kpi['fn']:,}",
+                    _fmt_percent(kpi["tpr"]),
+                    _fmt_percent(kpi["precision"]),
+                    _fmt_percent(kpi["f1"]),
+                ]
+            )
+        tables = [kpi_rows] if len(kpi_rows) > 1 else []
+
+        dataset_rows = [["Run", "Distinct datasets"]]
+        for lbl, view in views:
+            n_ds = con.execute(f"SELECT COUNT(DISTINCT t4dataset_id) FROM {view}").fetchone()[0]
+            dataset_rows.append([lbl, f"{int(n_ds or 0):,}"])
+        if len(dataset_rows) > 1:
+            tables.append(dataset_rows)
+
+        df_status = _query_status_counts(con, views)
+        if not df_status.empty:
+            fig_status = _build_detection_status_figure(df_status)
+            _apply_detection_theme(fig_status, "Detection status distribution by label")
+            figures.append((fig_status, "Stacked TP/FP/FN counts per label from the first parquet file in each selected run."))
+
+        figures.extend(_build_detection_distance_figures(con, views))
+        figures.extend(_build_detection_tpr_figures(con, views))
+        figures.extend(_build_detection_mean_error_figures(con, views))
+        figures.extend(_build_detection_perception_diff_figures(con, views))
+
+        return {
+            "summary": (
+                "This section follows the default Detection Stats view as closely as possible: "
+                "summary KPIs, status distribution, distance panels, TP rate, mean error, and compare-mode perception diff."
+            ),
+            "figures": figures,
+            "tables": tables,
+            "fallback_note": "Detection charts were unavailable for the selected parquet data.",
+        }
+    finally:
+        con.close()
+
+
+def _build_tp_mean_by_label_compare_figure(
+    df_list: Sequence[pd.DataFrame],
+    run_labels: Sequence[str],
+    label_col: str,
+    *,
+    label_jp_map: Optional[dict] = None,
+) -> Optional[go.Figure]:
+    if not df_list or not run_labels or label_col not in df_list[0].columns:
+        return None
+    all_labels = set()
+    groups = []
+    for df in df_list:
+        if label_col not in df.columns:
+            return None
+        xdf = df[df[label_col].notna() & (df[label_col].astype(str).str.strip() != "")]
+        g = xdf.groupby(label_col)["TP"].mean() if not xdf.empty else pd.Series(dtype=float)
+        groups.append(g)
+        all_labels.update(g.index)
+    if not all_labels:
+        return None
+    all_labels = sorted(all_labels)
+    labels_disp = [label_jp_map.get(l, l) for l in all_labels] if label_jp_map else all_labels
+    traces = []
+    for idx, (g, lbl) in enumerate(zip(groups, run_labels)):
+        vals = [g.get(label, float("nan")) for label in all_labels]
+        traces.append(
+            go.Bar(
+                name=lbl,
+                x=labels_disp,
+                y=vals,
+                marker=dict(color=_OVERVIEW_COMPARE_COLORS[idx % len(_OVERVIEW_COMPARE_COLORS)]),
+                text=[f"{x:.2f}" if pd.notna(x) else "N/A" for x in vals],
+                textposition="auto",
+            )
+        )
+    fig = go.Figure(traces)
+    fig.update_layout(
+        title=f"TP mean by {label_col.replace('_', ' ')}",
+        barmode="group",
+        xaxis_title=label_col.replace("_", " ").title(),
+        yaxis_title="TP mean",
+        height=420,
+        margin=dict(t=70, b=55, l=55, r=25),
+        legend_title="Run",
+        template="plotly_white",
+    )
+    return fig
+
+
+def _build_tp_default_single_figures(df: pd.DataFrame) -> List[Tuple[go.Figure, str]]:
+    df_f = df.copy()
+    for column in ("vx", "vy"):
+        if column in df_f.columns and not df_f.empty:
+            q1, q99 = df_f[column].quantile([0.01, 0.99]).values
+            df_f[column] = df_f[column].clip(q1, q99)
+    figures: List[Tuple[go.Figure, str]] = []
+    fig_rms = px.scatter(
+        df_f,
+        x="xrms",
+        y="yrms",
+        color="TP",
+        hover_data=["id"],
+        labels={"xrms": "X RMS", "yrms": "Y RMS", "TP": "TP"},
+        color_continuous_scale="Viridis",
+    )
+    fig_rms.update_traces(marker=dict(size=8, opacity=0.7))
+    _apply_tp_clean_theme(fig_rms)
+    figures.append((fig_rms, "Default TP Summary RMS scatter from the selected Summary.csv rows."))
+
+    fig_vel = px.scatter(
+        df_f,
+        x="vx",
+        y="vy",
+        color="TP",
+        hover_data=["id"],
+        labels={"vx": "Vx", "vy": "Vy", "TP": "TP"},
+        color_continuous_scale="Plasma",
+        title="Vx vs Vy",
+    )
+    _apply_tp_clean_theme(fig_vel)
+    figures.append((fig_vel, "Default TP Summary velocity scatter with outlier clipping enabled."))
+
+    figures.append((_build_tp_distribution_figure(df_f, "TP"), "Default TP distribution view (metric = TP)."))
+    figures.append((_build_tp_violin_figure(df_f, "TP"), "Default TP density violin for metric = TP."))
+    return figures
+
+
+def _build_tp_default_compare_figures(df_delta: pd.DataFrame, candidate_label: str) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    tp_col = "TP_delta"
+    fig_rms_x = px.scatter(
+        df_delta,
+        x="xrms_B",
+        y="xrms",
+        color=tp_col,
+        hover_data=["id", "xrms_delta", "yrms_delta"],
+        labels={
+            "xrms_B": f"X RMS ({candidate_label})",
+            "xrms": "X RMS (A)",
+            tp_col: "Delta TP",
+            "xrms_delta": "Delta X RMS",
+            "yrms_delta": "Delta Y RMS",
+        },
+        title=f"Scatter: X RMS ({candidate_label}) vs X RMS (A)",
+        color_continuous_scale="Viridis",
+    )
+    fig_rms_x.update_traces(marker=dict(size=8, opacity=0.6))
+    _apply_tp_clean_theme(fig_rms_x)
+    figures.append(
+        (fig_rms_x, f"TP Summary compare ({candidate_label} vs baseline): X RMS scatter, colored by TP delta.")
+    )
+
+    fig_rms_y = px.scatter(
+        df_delta,
+        x="yrms_B",
+        y="yrms",
+        color=tp_col,
+        hover_data=["id", "xrms_delta", "yrms_delta"],
+        labels={
+            "yrms_B": f"Y RMS ({candidate_label})",
+            "yrms": "Y RMS (A)",
+            tp_col: "Delta TP",
+            "xrms_delta": "Delta X RMS",
+            "yrms_delta": "Delta Y RMS",
+        },
+        title=f"Scatter: Y RMS ({candidate_label}) vs Y RMS (A)",
+        color_continuous_scale="Viridis",
+    )
+    fig_rms_y.update_traces(marker=dict(size=8, opacity=0.6))
+    _apply_tp_clean_theme(fig_rms_y)
+    figures.append(
+        (fig_rms_y, f"TP Summary compare ({candidate_label} vs baseline): Y RMS scatter, colored by TP delta.")
+    )
+
+    figures.append(
+        (
+            _build_tp_distribution_figure(df_delta, "TP_delta"),
+            f"TP Summary compare ({candidate_label} vs baseline): TP delta distribution.",
+        )
+    )
+    figures.append(
+        (
+            _build_tp_violin_figure(df_delta, "TP_delta"),
+            f"TP Summary compare ({candidate_label} vs baseline): TP delta violin.",
+        )
+    )
+    return figures
+
+
+def _build_tp_distribution_figure(df: pd.DataFrame, metric: str) -> go.Figure:
+    fig = px.histogram(
+        df,
+        x=metric,
+        nbins=40,
+        color_discrete_sequence=["#0d9488"],
+        marginal="box",
+        opacity=0.88,
+    )
+    fig.update_layout(
+        template="plotly_white",
+        showlegend=False,
+        bargap=0.04,
+        xaxis_title=metric,
+        yaxis_title="Count",
+        paper_bgcolor="rgba(248,250,252,0.9)",
+        plot_bgcolor="rgba(255,255,255,0.95)",
+        font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
+        margin=dict(t=36, b=48, l=56, r=28),
+    )
+    return fig
+
+
+def _build_tp_violin_figure(df: pd.DataFrame, metric: str) -> go.Figure:
+    fig = px.violin(
+        df,
+        y=metric,
+        box=True,
+        points="all",
+        color_discrete_sequence=["#312e81"],
+    )
+    fig.update_layout(
+        template="plotly_white",
+        yaxis_title=metric,
+        showlegend=False,
+        paper_bgcolor="rgba(248,250,252,0.9)",
+        plot_bgcolor="rgba(255,255,255,0.95)",
+        font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
+        margin=dict(t=36, b=48, l=56, r=28),
+    )
+    return fig
+
+
+def _apply_tp_clean_theme(fig: go.Figure) -> None:
+    fig.update_layout(
+        template="plotly_white",
+        paper_bgcolor="rgba(248,250,252,0.9)",
+        plot_bgcolor="rgba(255,255,255,0.95)",
+        font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
+        margin=dict(t=48, b=48, l=56, r=28),
+    )
+
+
+def _build_criteria_default_single_figures(df_view: pd.DataFrame) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    metric = "pass_rate"
+    group_by = "GT_OBJ" if df_view["GT_OBJ"].notna().any() else "Option"
+    fig_hist = px.histogram(
+        df_view,
+        x=metric,
+        color=group_by,
+        nbins=30,
+        marginal="box",
+        color_discrete_sequence=px.colors.qualitative.Bold,
+    )
+    _apply_criteria_theme(fig_hist, f"{metric} · histogram")
+    figures.append((fig_hist, "Default Criteria page distribution chart for criteria0 and metric = pass_rate."))
+
+    df_avg = df_view.groupby(group_by, as_index=False)[metric].mean().sort_values(metric, ascending=False)
+    fig_bar = px.bar(
+        df_avg,
+        x=group_by,
+        y=metric,
+        text_auto=".2f",
+        color=group_by,
+        color_discrete_sequence=px.colors.qualitative.Bold,
+    )
+    _apply_criteria_theme(fig_bar, f"Mean {metric}")
+    fig_bar.update_layout(showlegend=False)
+    figures.append((fig_bar, f"Default grouped mean chart by {group_by}."))
+
+    fig_box = px.box(
+        df_view,
+        x=group_by,
+        y="pass_rate",
+        points="all",
+        color=group_by,
+        color_discrete_sequence=px.colors.qualitative.Bold,
+    )
+    _apply_criteria_theme(fig_box, "Pass rate by group")
+    fig_box.update_layout(showlegend=False)
+    figures.append((fig_box, f"Default pass-rate overview by {group_by}."))
+    return figures
+
+
+def _build_criteria_default_compare_figures(views: Sequence[Tuple[str, pd.DataFrame]]) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    metric = "pass_rate"
+    group_by = "GT_OBJ"
+    run_order = [lbl for lbl, _ in views]
+    combined = pd.concat([df.assign(Run=lbl) for lbl, df in views], ignore_index=True)
+    combined["Run"] = pd.Categorical(combined["Run"], categories=run_order, ordered=True)
+    px_map = {lbl: _COMPARE_RUN_COLORS[i % len(_COMPARE_RUN_COLORS)] for i, (lbl, _) in enumerate(views)}
+
+    fig_hist = px.histogram(
+        combined,
+        x=metric,
+        color="Run",
+        color_discrete_map=px_map,
+        category_orders={"Run": run_order},
+        nbins=30,
+        barmode="overlay",
+        opacity=0.55,
+        marginal="box",
+    )
+    _apply_criteria_theme(fig_hist, f"{metric} · row-level distribution")
+    figures.append((fig_hist, "Default compare overlay view for pass-rate distribution."))
+
+    df_avg = combined.groupby([group_by, "Run"], as_index=False)[metric].mean()
+    obj_means = df_avg.groupby(group_by, as_index=False)[metric].mean().sort_values(metric, ascending=False)
+    obj_order = [x for x in obj_means[group_by].tolist() if x in set(df_avg[group_by])]
+    df_avg[group_by] = pd.Categorical(df_avg[group_by], categories=obj_order, ordered=True)
+    df_avg = df_avg.sort_values([group_by, "Run"])
+    fig_bar = px.bar(
+        df_avg,
+        x=group_by,
+        y=metric,
+        color="Run",
+        color_discrete_map=px_map,
+        category_orders={group_by: obj_order, "Run": run_order},
+        barmode="group",
+        text_auto=".2f",
+    )
+    _apply_criteria_theme(fig_bar, f"Mean {metric} by {group_by}")
+    figures.append((fig_bar, f"Default compare grouped mean view by {group_by}."))
+
+    fig_box = px.box(
+        combined,
+        x=group_by,
+        y="pass_rate",
+        color="Run",
+        color_discrete_map=px_map,
+        category_orders={group_by: obj_order, "Run": run_order},
+        points="all",
+    )
+    _apply_criteria_theme(fig_box, "Pass rate overview")
+    figures.append((fig_box, f"Default compare pass-rate overview by {group_by}."))
+
+    scenario_delta = _build_criteria_compare_delta_figure(views)
+    if scenario_delta is not None:
+        base_l = run_order[0]
+        if len(run_order) == 2:
+            cap = f"Default compare per-scenario delta view for candidate {run_order[1]} vs baseline {base_l}."
+        else:
+            rest = ", ".join(run_order[1:])
+            cap = (
+                f"Default compare per-scenario delta vs baseline {base_l} "
+                f"for candidates {rest} (grouped bars)."
+            )
+        figures.append((scenario_delta, cap))
+    return figures
+
+
+def _build_criteria_single_table(df_view: pd.DataFrame) -> List[List[str]]:
+    key_cols = score_identity_cols(df_view)
+    scenario_metric = df_view.groupby(key_cols, as_index=False)["pass_rate"].mean().sort_values("pass_rate", ascending=False).head(20)
+    rows = [key_cols + ["Pass rate mean"]]
+    for _, row in scenario_metric.iterrows():
+        rows.append([_shorten_scenario_name(str(row[c])) for c in key_cols] + [_fmt_number(row["pass_rate"])])
+    first_w = 0.56 if len(key_cols) > 1 else 0.72
+    rest_w = (1.0 - first_w) / len(key_cols)
+    return {"rows": rows, "col_width_weights": [first_w] + [rest_w] * len(key_cols)}
+
+
+def _build_criteria_compare_table(views: Sequence[Tuple[str, pd.DataFrame]]) -> List[List[str]]:
+    labels = [lbl for lbl, _ in views]
+    key_cols = score_identity_cols(views[0][1])
+    merges = []
+    for lbl, df in views:
+        g = df.groupby(key_cols, as_index=False)["pass_rate"].mean()
+        merges.append(g.rename(columns={"pass_rate": f"pr_{lbl}"}))
+    per_scenario = merges[0]
+    for g in merges[1:]:
+        per_scenario = per_scenario.merge(g, on=key_cols, how="inner")
+    base = labels[0]
+    delta_cols: List[str] = []
+    for cand in labels[1:]:
+        dcol = f"delta_{cand}"
+        per_scenario[dcol] = per_scenario[f"pr_{cand}"] - per_scenario[f"pr_{base}"]
+        delta_cols.append(dcol)
+    rank_key = per_scenario[delta_cols].abs().max(axis=1)
+    per_scenario = per_scenario.reindex(rank_key.sort_values(ascending=False).index).head(20)
+    header: List[str] = key_cols + [f"Pass rate ({base})"]
+    for cand in labels[1:]:
+        header.extend([f"Pass rate ({cand})", f"Δ({cand} - {base})"])
+    rows = [header]
+    for _, row in per_scenario.iterrows():
+        cells: List[str] = [_shorten_scenario_name(str(row[c])) for c in key_cols] + [_fmt_number(row[f"pr_{base}"])]
+        for cand in labels[1:]:
+            cells.extend([_fmt_number(row[f"pr_{cand}"]), _fmt_number(row[f"delta_{cand}"])])
+        rows.append(cells)
+    ncols = len(header)
+    scen_w = 0.28 if ncols > 5 else 0.44
+    rest_w = (1.0 - scen_w) / max(ncols - 1, 1)
+    weights = [scen_w] + [rest_w] * (ncols - 1)
+    return {"rows": rows, "col_width_weights": weights}
+
+
+def _build_criteria_compare_delta_figure(views: Sequence[Tuple[str, pd.DataFrame]]) -> Optional[go.Figure]:
+    if len(views) < 2:
+        return None
+    labels = [lbl for lbl, _ in views]
+    base = labels[0]
+    key_cols = score_identity_cols(views[0][1])
+    merges = []
+    for lbl, df in views:
+        g = df.groupby(key_cols, as_index=False)["pass_rate"].mean()
+        merges.append(g.rename(columns={"pass_rate": f"pr_{lbl}"}))
+    per_scenario = merges[0]
+    for g in merges[1:]:
+        per_scenario = per_scenario.merge(g, on=key_cols, how="inner")
+    if per_scenario.empty:
+        return None
+    long_rows: List[dict] = []
+    delta_cols: List[str] = []
+    for cand in labels[1:]:
+        dcol = f"delta_{cand}"
+        per_scenario[dcol] = per_scenario[f"pr_{cand}"] - per_scenario[f"pr_{base}"]
+        delta_cols.append(dcol)
+    rank_key = per_scenario[delta_cols].abs().max(axis=1)
+    vis = per_scenario.reindex(rank_key.sort_values(ascending=False).index).head(20)
+    if "Dataset" in key_cols:
+        scenario_labels = vis["Scenario"].astype(str) + " [" + vis["Dataset"].astype(str) + "]"
+    else:
+        scenario_labels = vis["Scenario"].astype(str)
+    scen_order = [_shorten_scenario_name(str(s)) for s in scenario_labels.tolist()]
+    for _, row in vis.iterrows():
+        scen_raw = f"{row['Scenario']} [{row['Dataset']}]" if "Dataset" in key_cols else row["Scenario"]
+        scen_disp = _shorten_scenario_name(str(scen_raw))
+        for cand in labels[1:]:
+            long_rows.append(
+                {
+                    "Scenario": scen_disp,
+                    "vs_baseline": f"Δ({cand} - {base})",
+                    "delta": float(row[f"delta_{cand}"]),
+                }
+            )
+    melted = pd.DataFrame(long_rows)
+    if melted.empty:
+        return None
+    legend_order = [f"Δ({cand} - {base})" for cand in labels[1:]]
+    color_map = {
+        leg: _COMPARE_RUN_COLORS[(i + 1) % len(_COMPARE_RUN_COLORS)]
+        for i, leg in enumerate(legend_order)
+    }
+    fig = px.bar(
+        melted,
+        x="Scenario",
+        y="delta",
+        color="vs_baseline",
+        color_discrete_map=color_map,
+        category_orders={"Scenario": scen_order, "vs_baseline": legend_order},
+        barmode="group",
+        text_auto=".2f",
+    )
+    fig.update_layout(coloraxis_showscale=False, legend_title_text="")
+    _apply_criteria_theme(fig, "Pass rate delta by scenario")
+    return fig
+
+
+def _build_detection_status_figure(df_status: pd.DataFrame) -> go.Figure:
+    status_colors = {"TP": "#2d8f47", "FN": "#d73027", "FP": "#E86A33", "TN": "#4A90D9"}
+    if "run" in df_status.columns and df_status["run"].nunique() > 1:
+        fig = px.bar(
+            df_status,
+            x="label",
+            y="num",
+            color="status",
+            barmode="stack",
+            facet_col="run",
+            color_discrete_map=status_colors,
+            title="Status Distribution per Label",
+            labels={"num": "Count", "label": "Label", "status": "Status"},
+        )
+        fig.for_each_annotation(lambda ann: ann.update(text=ann.text.replace("run=", "")))
+        return fig
+    if df_status["label"].nunique() > 6:
+        return px.bar(
+            df_status,
+            y="label",
+            x="num",
+            color="status",
+            barmode="stack",
+            title="Status Distribution per Label",
+            labels={"num": "Count", "label": "Label", "status": "Status"},
+            color_discrete_map=status_colors,
+            orientation="h",
+        )
+    return px.bar(
+        df_status,
+        x="label",
+        y="num",
+        color="status",
+        barmode="stack",
+        title="Status Distribution per Label",
+        labels={"num": "Count", "label": "Label", "status": "Status"},
+        color_discrete_map=status_colors,
+    )
+
+
+def _build_detection_distance_figures(
+    con: duckdb.DuckDBPyConnection,
+    views: Sequence[Tuple[str, str]],
+) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    labels = [lbl for lbl, _ in views]
+    if len(views) == 1:
+        df_both = _query_distance_rates_single(con, views[0][1])
+        if not df_both.empty:
+            fig = go.Figure()
+            fig.add_trace(
+                go.Bar(
+                    x=df_both["bin_label"],
+                    y=df_both["tpr"],
+                    name="TP rate",
+                    marker_color=_COMPARE_RUN_COLORS[0],
+                    hovertemplate="%{x}<br>TP rate: %{y:.2%}<extra></extra>",
+                )
+            )
+            fig.add_trace(
+                go.Bar(
+                    x=df_both["bin_label"],
+                    y=df_both["fpr"],
+                    name="FP rate",
+                    marker_color=_COMPARE_RUN_COLORS[2],
+                    hovertemplate="%{x}<br>FP rate: %{y:.2%}<extra></extra>",
+                )
+            )
+            _apply_detection_theme(fig, "TP & FP rate by distance")
+            fig.update_layout(
+                xaxis_title="Distance bin",
+                yaxis_title="Rate",
+                yaxis_range=[0, 1],
+                barmode="group",
+                xaxis=dict(tickangle=-35, categoryorder="array", categoryarray=df_both["bin_label"].tolist()),
+                hovermode="x unified",
+            )
+            fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+            figures.append((fig, "Detection Stats distance panel in bar-chart mode across the full 0-150+ range."))
+        df_oc = _query_object_counts_single(con, views[0][1])
+        if not df_oc.empty:
+            align_x = sorted(df_oc["bin_label"].unique(), key=_distance_bin_sort_key)
+            pivot_oc = df_oc.pivot_table(index="bin_label", columns="label", values="n", aggfunc="sum", fill_value=0).reindex(align_x, fill_value=0)
+            fig_oc = go.Figure()
+            for j, lab in enumerate(pivot_oc.columns):
+                c = _compare_color(j)
+                fig_oc.add_trace(
+                    go.Bar(
+                        x=align_x,
+                        y=pivot_oc[lab].values,
+                        name=str(lab),
+                        marker_color=c,
+                        hovertemplate=f"{lab}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                    )
+                )
+            _apply_detection_theme(fig_oc, "Object count by distance bin")
+            fig_oc.update_layout(
+                xaxis_title="Distance bin",
+                yaxis_title="Count",
+                barmode="group",
+                xaxis=dict(tickangle=-35, categoryorder="array", categoryarray=align_x),
+                hovermode="x unified",
+            )
+            figures.append((fig_oc, "Detection Stats object-count-by-distance panel in bar-chart mode across the full 0-150+ range."))
+        return figures
+
+    df_tpr = _query_distance_rates_compare(con, views, metric="tpr")
+    if not df_tpr.empty:
+        fig_tpr = go.Figure()
+        for i, lbl in enumerate(labels):
+            d = df_tpr[df_tpr["run"] == lbl].sort_values("bin_order")
+            c = _compare_color(i)
+            fig_tpr.add_trace(
+                go.Bar(
+                    x=d["bin_label"],
+                    y=d["tpr"],
+                    name=lbl,
+                    marker_color=c,
+                    hovertemplate=f"{lbl}<br>%{{x}}<br>TP rate: %{{y:.2%}}<extra></extra>",
+                )
+            )
+        align_x = df_tpr[df_tpr["run"] == labels[0]].sort_values("bin_order")["bin_label"].tolist()
+        _apply_detection_theme(fig_tpr, "TP rate by distance")
+        fig_tpr.update_layout(
+            xaxis_title="Distance bin",
+            yaxis_title="TP rate",
+            yaxis_range=[0, 1],
+            barmode="group",
+            xaxis=dict(tickangle=-35, categoryorder="array", categoryarray=align_x),
+            hovermode="x unified",
+        )
+        fig_tpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+        figures.append((fig_tpr, "Detection Stats compare distance panel in bar-chart mode: TP rate by distance."))
+
+    df_fpr = _query_distance_rates_compare(con, views, metric="fpr")
+    if not df_fpr.empty:
+        fig_fpr = go.Figure()
+        for i, lbl in enumerate(labels):
+            d = df_fpr[df_fpr["run"] == lbl].sort_values("bin_order")
+            c = _compare_color(i)
+            fig_fpr.add_trace(
+                go.Bar(
+                    x=d["bin_label"],
+                    y=d["fpr"],
+                    name=lbl,
+                    marker_color=c,
+                    hovertemplate=f"{lbl}<br>%{{x}}<br>FP rate: %{{y:.2%}}<extra></extra>",
+                )
+            )
+        align_x = df_fpr[df_fpr["run"] == labels[0]].sort_values("bin_order")["bin_label"].tolist()
+        _apply_detection_theme(fig_fpr, "FP rate by distance")
+        fig_fpr.update_layout(
+            xaxis_title="Distance bin",
+            yaxis_title="FP rate",
+            yaxis_range=[0, 1],
+            barmode="group",
+            xaxis=dict(tickangle=-35, categoryorder="array", categoryarray=align_x),
+            hovermode="x unified",
+        )
+        fig_fpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+        figures.append((fig_fpr, "Detection Stats compare distance panel in bar-chart mode: FP rate by distance."))
+
+    df_oc = _query_object_counts_compare(con, views)
+    if not df_oc.empty:
+        align_x = sorted(df_oc["bin_label"].unique(), key=_distance_bin_sort_key)
+        pivot_oc = df_oc.pivot_table(index="bin_label", columns="run", values="n", aggfunc="sum", fill_value=0).reindex(align_x, fill_value=0)
+        fig_oc = go.Figure()
+        for j, rl in enumerate([r for r in labels if r in pivot_oc.columns]):
+            c = _compare_color(j)
+            fig_oc.add_trace(
+                go.Bar(
+                    x=align_x,
+                    y=pivot_oc[rl].values,
+                    name=str(rl),
+                    marker_color=c,
+                    hovertemplate=f"{rl}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                )
+            )
+        _apply_detection_theme(fig_oc, "Object count by distance bin")
+        fig_oc.update_layout(
+            xaxis_title="Distance bin",
+            yaxis_title="Count",
+            barmode="group",
+            xaxis=dict(tickangle=-35, categoryorder="array", categoryarray=align_x),
+            hovermode="x unified",
+        )
+        figures.append((fig_oc, "Detection Stats compare object-count-by-distance panel in bar-chart mode."))
+    return figures
+
+
+def _build_detection_tpr_figures(
+    con: duckdb.DuckDBPyConnection,
+    views: Sequence[Tuple[str, str]],
+) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    labels = [lbl for lbl, _ in views]
+    if len(views) == 1:
+        df_tpr = _query_tpr_by_label(con, views[0][1], _DEFAULT_MAX_EVAL_RANGE)
+        if df_tpr.empty:
+            return figures
+        fig = px.bar(
+            df_tpr,
+            x="label",
+            y="tpr",
+            title=f"Total TP rate within {_DEFAULT_MAX_EVAL_RANGE} [m]",
+            labels={"tpr": "TP Rate", "label": "Label"},
+            color_discrete_sequence=[_COMPARE_RUN_COLORS[0]],
+        )
+        fig.update_traces(marker_color=_COMPARE_RUN_COLORS[0])
+        _apply_detection_theme(fig, f"Total TP rate within {_DEFAULT_MAX_EVAL_RANGE} [m]")
+        fig.update_layout(yaxis_range=[0, 1.2])
+        fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
+        figures.append((fig, "Default Detection Stats TP-rate panel: bar chart per object class."))
+        return figures
+
+    dfs = []
+    for lbl, view_name in views:
+        df = _query_tpr_by_label(con, view_name, _DEFAULT_MAX_EVAL_RANGE)
+        if df.empty:
+            continue
+        df["run"] = lbl
+        dfs.append(df)
+    if not dfs:
+        return figures
+    df_all = pd.concat(dfs, ignore_index=True)
+    cats = sorted(df_all["label"].astype(str).unique())
+    fig = _tpr_spider_compare_figure(df_all, cats, "TP rate (<=50 m)", labels, height=360)
+    figures.append((fig, "Default compare Detection Stats TP-rate panel: spider chart per object class."))
+    return figures
+
+
+def _build_detection_mean_error_figures(
+    con: duckdb.DuckDBPyConnection,
+    views: Sequence[Tuple[str, str]],
+) -> List[Tuple[go.Figure, str]]:
+    figures: List[Tuple[go.Figure, str]] = []
+    labels = [lbl for lbl, _ in views]
+    if not _views_have_error_columns(con, [view for _, view in views]):
+        return figures
+    if len(views) == 1:
+        df = _query_mean_error_by_label(con, views[0][1], _DEFAULT_MAX_EVAL_RANGE)
+        if df.empty:
+            return figures
+        fig = go.Figure()
+        fig.add_trace(go.Bar(x=df["label"], y=df["mean_abs_x_error"], name="X Error", marker_color=_compare_color(0)))
+        fig.add_trace(go.Bar(x=df["label"], y=df["mean_abs_y_error"], name="Y Error", marker_color=_compare_color(1)))
+        fig.add_trace(go.Bar(x=df["label"], y=df["mean_abs_yaw_error"], name="Yaw Error", marker_color=_compare_color(2)))
+        _apply_detection_theme(fig, f"Mean Error within {_DEFAULT_MAX_EVAL_RANGE} [m]")
+        fig.update_layout(xaxis_title="Label", yaxis_title="Error [m] or [rad]", barmode="group")
+        figures.append((fig, "Default Detection Stats mean-error panel: grouped bars for X/Y/Yaw."))
+        return figures
+
+    dfs = []
+    for lbl, view_name in views:
+        df = _query_mean_error_by_label(con, view_name, _DEFAULT_MAX_EVAL_RANGE)
+        if df.empty:
+            continue
+        df["run"] = lbl
+        dfs.append(df)
+    if not dfs:
+        return figures
+    df_err_melt = pd.concat(dfs, ignore_index=True)
+    cats = sorted(df_err_melt["label"].astype(str).unique())
+    err_specs = [
+        ("Mean |x error| (within 50 m)", "mean_abs_x_error", "Mean |x error| (m)", ".3f"),
+        ("Mean |y error| (within 50 m)", "mean_abs_y_error", "Mean |y error| (m)", ".3f"),
+        ("Mean |yaw error| (within 50 m)", "mean_abs_yaw_error", "Mean |yaw error| (rad)", ".4f"),
+    ]
+    for chart_title, col, hover_lbl, tfmt in err_specs:
+        figures.append(
+            (
+                _scalar_metric_spider_compare_figure(df_err_melt, cats, chart_title, labels, col, hover_lbl, height=400, tickformat=tfmt),
+                f"Default compare Detection Stats mean-error panel: spider chart for {hover_lbl}.",
+            )
+        )
+    return figures
+
+
+def _query_distance_rates_single(con: duckdb.DuckDBPyConnection, view_name: str) -> pd.DataFrame:
+    query = f"""
+    WITH stats AS (
+        SELECT
+            distance_bin,
+            COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) AS gt_total,
+            COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS tp_gt,
+            COUNT(*) FILTER (WHERE source='EST' AND status IN ('TP','FP')) AS est_total,
+            COUNT(*) FILTER (WHERE source='EST' AND status='FP') AS fp_est
+        FROM {view_name}
+        GROUP BY distance_bin
+    )
+    SELECT
+        distance_bin,
+        CASE WHEN gt_total > 0 THEN CAST(tp_gt AS DOUBLE) / gt_total ELSE 0 END AS tpr,
+        CASE WHEN est_total > 0 THEN CAST(fp_est AS DOUBLE) / est_total ELSE 0 END AS fpr
+    FROM stats
+    """
+    df = con.execute(query).df()
+    return _decorate_distance_bins(df)
+
+
+def _query_distance_rates_compare(
+    con: duckdb.DuckDBPyConnection,
+    views: Sequence[Tuple[str, str]],
+    *,
+    metric: str,
+) -> pd.DataFrame:
+    frames = []
+    for lbl, view_name in views:
+        query = f"""
+        WITH stats AS (
+            SELECT
+                distance_bin,
+                COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) AS gt_total,
+                COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS tp_gt,
+                COUNT(*) FILTER (WHERE source='EST' AND status IN ('TP','FP')) AS est_total,
+                COUNT(*) FILTER (WHERE source='EST' AND status='FP') AS fp_est
+            FROM {view_name}
+            GROUP BY distance_bin
+        )
+        SELECT
+            distance_bin,
+            CASE
+                WHEN {'gt_total' if metric == 'tpr' else 'est_total'} > 0
+                THEN CAST({'tp_gt' if metric == 'tpr' else 'fp_est'} AS DOUBLE) / {'gt_total' if metric == 'tpr' else 'est_total'}
+                ELSE 0
+            END AS {metric}
+        FROM stats
+        """
+        df = con.execute(query).df()
+        if df.empty:
+            continue
+        df["run"] = lbl
+        frames.append(_decorate_distance_bins(df))
+    if not frames:
+        return pd.DataFrame()
+    return pd.concat(frames, ignore_index=True)
+
+
+def _query_object_counts_single(con: duckdb.DuckDBPyConnection, view_name: str) -> pd.DataFrame:
+    query = f"""
+    SELECT distance_bin, label, COUNT(*) AS n
+    FROM {view_name}
+    GROUP BY distance_bin, label
+    """
+    return _decorate_distance_bins(con.execute(query).df())
+
+
+def _query_object_counts_compare(con: duckdb.DuckDBPyConnection, views: Sequence[Tuple[str, str]]) -> pd.DataFrame:
+    frames = []
+    for lbl, view_name in views:
+        query = f"""
+        SELECT distance_bin, COUNT(*) AS n
+        FROM {view_name}
+        GROUP BY distance_bin
+        """
+        df = con.execute(query).df()
+        if df.empty:
+            continue
+        df["run"] = lbl
+        frames.append(_decorate_distance_bins(df))
+    if not frames:
+        return pd.DataFrame()
+    return pd.concat(frames, ignore_index=True)
+
+
+def _query_tpr_by_label(con: duckdb.DuckDBPyConnection, view_name: str, max_range: int) -> pd.DataFrame:
+    query = f"""
+    SELECT
+        label,
+        CASE
+            WHEN COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) > 0
+            THEN CAST(COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS DOUBLE)
+                 / COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN'))
+            ELSE 0
+        END AS tpr
+    FROM {view_name}
+    WHERE dist_h < {int(max_range)}
+    GROUP BY label
+    ORDER BY label
+    """
+    return con.execute(query).df()
+
+
+def _query_mean_error_by_label(con: duckdb.DuckDBPyConnection, view_name: str, max_range: int) -> pd.DataFrame:
+    query = f"""
+    SELECT
+        label,
+        AVG(ABS(CAST(x_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND x_error IS NOT NULL) AS mean_abs_x_error,
+        AVG(ABS(CAST(y_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND y_error IS NOT NULL) AS mean_abs_y_error,
+        AVG(ABS(CAST(yaw_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND yaw_error IS NOT NULL) AS mean_abs_yaw_error
+    FROM {view_name}
+    WHERE dist_h < {int(max_range)}
+    GROUP BY label
+    ORDER BY label
+    """
+    return con.execute(query).df()
+
+
+def _build_detection_perception_diff_figures(
+    con: duckdb.DuckDBPyConnection,
+    views: Sequence[Tuple[str, str]],
+) -> List[Tuple[go.Figure, str]]:
+    if len(views) < 2:
+        return []
+    figures: List[Tuple[go.Figure, str]] = []
+    base_view = views[0][1]
+    for lbl, comp_view in views[1:]:
+        df_obj = _query_perception_diff_objects(con, base_view, comp_view)
+        if df_obj.empty:
+            continue
+        h_imp = _baobab_hierarchy_from_objects(df_obj, "improved", f"Improved ({lbl} vs A)", 15, 10)
+        h_deg = _baobab_hierarchy_from_objects(df_obj, "degraded", f"Degraded ({lbl} vs A)", 15, 10)
+        if not h_imp.empty and "n" in h_imp.columns:
+            fig_imp = px.sunburst(
+                h_imp,
+                path=["root", "scen_g", "fr_display", "label"],
+                values="n",
+                color="n",
+                color_continuous_scale=[[0.0, "#f7fcf5"], [1.0, "#1a9850"]],
+                title=f"Sunburst: improved (n = {int(h_imp['n'].sum())} GT objects)",
+            )
+            _apply_detection_theme(fig_imp, f"Sunburst: improved ({lbl} vs A)")
+            figures.append((fig_imp, f"Perception diff sunburst for improved objects: {lbl} vs baseline A."))
+        if not h_deg.empty and "n" in h_deg.columns:
+            fig_deg = px.sunburst(
+                h_deg,
+                path=["root", "scen_g", "fr_display", "label"],
+                values="n",
+                color="n",
+                color_continuous_scale=[[0.0, "#fff5f0"], [1.0, "#d73027"]],
+                title=f"Sunburst: degraded (n = {int(h_deg['n'].sum())} GT objects)",
+            )
+            _apply_detection_theme(fig_deg, f"Sunburst: degraded ({lbl} vs A)")
+            figures.append((fig_deg, f"Perception diff sunburst for degraded objects: {lbl} vs baseline A."))
+
+        df_by_label, scen_agg, df_frame_sorted = _query_perception_diff_lens_tables(con, base_view, comp_view)
+        root_lens = f"{lbl} vs A"
+        if not df_by_label.empty:
+            tdf_l = _comparison_lens_treemap_df(
+                df_by_label["label"],
+                df_by_label["improved_cnt"],
+                df_by_label["degraded_cnt"],
+                root_lens,
+            )
+            fig_l = _comparison_lens_treemap_figure(tdf_l, "By class")
+            if fig_l is not None:
+                figures.append((fig_l, f"Perception diff comparison lens by class: {lbl} vs baseline A."))
+        if not scen_agg.empty:
+            tdf_s = _comparison_lens_treemap_df(
+                scen_agg["scenario_name"].astype(str),
+                scen_agg["improved_cnt"],
+                scen_agg["degraded_cnt"],
+                root_lens,
+            )
+            fig_s = _comparison_lens_treemap_figure(tdf_s, "By scenario")
+            if fig_s is not None:
+                figures.append((fig_s, f"Perception diff comparison lens by scenario: {lbl} vs baseline A."))
+        if not df_frame_sorted.empty:
+            fr_cap = 36
+            fr_top = df_frame_sorted.head(fr_cap).copy()
+            nms = (fr_top["scenario_name"].astype(str).str.slice(0, 26) + "\n· f" + fr_top["frame_index"].astype(str)).tolist()
+            ims = fr_top["improved_cnt"].astype(float).tolist()
+            dgs = fr_top["degraded_cnt"].astype(float).tolist()
+            rest = df_frame_sorted.iloc[fr_cap:]
+            if not rest.empty:
+                io = float(rest["improved_cnt"].sum())
+                do = float(rest["degraded_cnt"].sum())
+                if io > 0 or do > 0:
+                    nms.append(f"Other frames\n({len(rest)} frames)")
+                    ims.append(io)
+                    dgs.append(do)
+            tdf_f = _comparison_lens_treemap_df(pd.Series(nms), pd.Series(ims), pd.Series(dgs), root_lens)
+            fig_f = _comparison_lens_treemap_figure(tdf_f, "By frame")
+            if fig_f is not None:
+                figures.append((fig_f, f"Perception diff comparison lens by frame: {lbl} vs baseline A."))
+    return figures
+
+
+def _query_perception_diff_objects(
+    con: duckdb.DuckDBPyConnection,
+    base_view: str,
+    comp_view: str,
+) -> pd.DataFrame:
+    query = f"""
+    WITH base_gt AS (
+        SELECT
+            t4dataset_id,
+            frame_index,
+            uuid AS gt_uuid,
+            COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+            COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+            COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+            COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+        FROM {base_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1, 2, 3
+    ),
+    comp_gt AS (
+        SELECT
+            t4dataset_id,
+            frame_index,
+            uuid AS gt_uuid,
+            COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+            COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+            COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+            COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+        FROM {comp_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1, 2, 3
+    ),
+    joined AS (
+        SELECT
+            COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
+            COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
+            COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
+            COALESCE(b.tp_base, FALSE) AS tp_base,
+            COALESCE(c.tp_comp, FALSE) AS tp_comp,
+            COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
+            COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
+            COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
+        FROM base_gt b
+        FULL OUTER JOIN comp_gt c
+            ON b.t4dataset_id = c.t4dataset_id
+           AND b.frame_index = c.frame_index
+           AND b.gt_uuid = c.gt_uuid
+    ),
+    obj_attrs AS (
+        SELECT
+            t4dataset_id,
+            frame_index,
+            uuid,
+            MAX(CAST(label AS VARCHAR)) AS label,
+            MAX(dist_h) AS dist_h
+        FROM {base_view}
+        WHERE source = 'GT'
+        GROUP BY 1, 2, 3
+    )
+    SELECT
+        j.t4dataset_id,
+        j.frame_index,
+        j.gt_uuid,
+        COALESCE(e.label, '') AS label,
+        COALESCE(e.dist_h, 0.0) AS dist_h,
+        {_DISTANCE_BIN_CASE.replace("dist_h", "COALESCE(e.dist_h, 0.0)")} AS distance_bin,
+        j.suite_name,
+        j.scenario_name,
+        j.t4dataset_name,
+        CASE
+            WHEN NOT j.tp_base AND j.tp_comp THEN 'improved'
+            WHEN j.tp_base AND NOT j.tp_comp THEN 'degraded'
+            WHEN j.tp_base AND j.tp_comp THEN 'both_tp'
+            ELSE 'both_fn'
+        END AS change_type,
+        j.tp_base,
+        j.tp_comp
+    FROM joined j
+    LEFT JOIN obj_attrs e
+        ON CAST(j.t4dataset_id AS VARCHAR) = CAST(e.t4dataset_id AS VARCHAR)
+       AND j.frame_index = CAST(e.frame_index AS VARCHAR)
+       AND j.gt_uuid = e.uuid
+    ORDER BY change_type, j.t4dataset_id, j.frame_index
+    """
+    try:
+        return con.execute(query).df()
+    except Exception:
+        return pd.DataFrame()
+
+
+def _query_perception_diff_lens_tables(
+    con: duckdb.DuckDBPyConnection,
+    base_view: str,
+    comp_view: str,
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    query_label = f"""
+    WITH base_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
+               COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base
+        FROM {base_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1, 2, 3
+    ),
+    comp_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
+               COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp
+        FROM {comp_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1, 2, 3
+    ),
+    joined AS (
+        SELECT COALESCE(b.label, c.label) AS label, COALESCE(b.tp_base, FALSE) AS tp_base, COALESCE(c.tp_comp, FALSE) AS tp_comp
+        FROM base_gt b FULL OUTER JOIN comp_gt c
+          ON b.t4dataset_id = c.t4dataset_id AND b.frame_index = c.frame_index AND b.gt_uuid = c.gt_uuid
+    )
+    SELECT label,
+           CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+           CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt
+    FROM joined
+    GROUP BY label
+    """
+    query_frame = f"""
+    WITH base_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+               COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name
+        FROM {base_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1,2,3
+    ),
+    comp_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+               COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name
+        FROM {comp_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1,2,3
+    ),
+    joined AS (
+        SELECT COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
+               COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
+               COALESCE(b.tp_base, FALSE) AS tp_base,
+               COALESCE(c.tp_comp, FALSE) AS tp_comp,
+               COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name
+        FROM base_gt b FULL OUTER JOIN comp_gt c
+          ON b.t4dataset_id = c.t4dataset_id AND b.frame_index = c.frame_index AND b.gt_uuid = c.gt_uuid
+    )
+    SELECT t4dataset_id, frame_index, scenario_name,
+           CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+           CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt
+    FROM joined
+    GROUP BY t4dataset_id, frame_index, scenario_name
+    ORDER BY degraded_cnt DESC, improved_cnt DESC
+    """
+    query_scenario = f"""
+    WITH base_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+               COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name
+        FROM {base_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1,2,3
+    ),
+    comp_gt AS (
+        SELECT t4dataset_id, frame_index, uuid AS gt_uuid, COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+               COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name
+        FROM {comp_view}
+        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+        GROUP BY 1,2,3
+    ),
+    joined AS (
+        SELECT COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
+               COALESCE(b.tp_base, FALSE) AS tp_base,
+               COALESCE(c.tp_comp, FALSE) AS tp_comp
+        FROM base_gt b FULL OUTER JOIN comp_gt c
+          ON b.t4dataset_id = c.t4dataset_id AND b.frame_index = c.frame_index AND b.gt_uuid = c.gt_uuid
+    )
+    SELECT scenario_name,
+           CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+           CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt
+    FROM joined
+    GROUP BY scenario_name
+    ORDER BY degraded_cnt DESC, improved_cnt DESC
+    """
+    try:
+        df_label = con.execute(query_label).df()
+    except Exception:
+        df_label = pd.DataFrame()
+    try:
+        df_scenario = con.execute(query_scenario).df()
+    except Exception:
+        df_scenario = pd.DataFrame()
+    try:
+        df_frame = con.execute(query_frame).df()
+    except Exception:
+        df_frame = pd.DataFrame()
+    return df_label, df_scenario, df_frame
+
+
+def _baobab_hierarchy_from_objects(
+    df_obj: pd.DataFrame,
+    change_type: str,
+    root_label: str,
+    max_scenarios: int,
+    max_frames: int,
+) -> pd.DataFrame:
+    if df_obj.empty or "change_type" not in df_obj.columns:
+        return pd.DataFrame()
+    sub = df_obj[df_obj["change_type"] == change_type].copy()
+    if sub.empty:
+        return pd.DataFrame()
+    sub["scenario_name"] = sub["scenario_name"].fillna("").astype(str).replace("", "(no scenario)")
+    sub["label"] = sub["label"].fillna("").astype(str).replace("", "(no label)")
+    sub["frame_key"] = sub["t4dataset_id"].astype(str) + "|f" + sub["frame_index"].astype(str)
+    leaf = sub.groupby(["scenario_name", "frame_key", "label"], dropna=False).size().reset_index(name="n")
+    scen_tot = leaf.groupby("scenario_name")["n"].sum().sort_values(ascending=False)
+    top_scen = set(scen_tot.head(max_scenarios).index.tolist())
+    leaf["scen_g"] = leaf["scenario_name"].where(leaf["scenario_name"].isin(top_scen), "Other scenarios")
+    out_parts = []
+    for _, g in leaf.groupby("scen_g"):
+        fr_tot = g.groupby("frame_key")["n"].sum().sort_values(ascending=False)
+        top_fr = set(fr_tot.head(max_frames).index.tolist())
+        g2 = g.copy()
+        g2["fr_g"] = g2["frame_key"].where(g2["frame_key"].isin(top_fr), "Other frames")
+        agg = g2.groupby(["scen_g", "fr_g", "label"], as_index=False)["n"].sum()
+        out_parts.append(agg)
+    out = pd.concat(out_parts, ignore_index=True)
+    out["root"] = root_label
+    out["fr_display"] = out["fr_g"].astype(str)
+    return out
+
+
+def _comparison_lens_treemap_df(names: pd.Series, improved: pd.Series, degraded: pd.Series, root_label: str) -> pd.DataFrame:
+    rows = []
+    for name, imp, deg in zip(names.astype(str), improved.astype(float), degraded.astype(float)):
+        if imp > 0:
+            rows.append({"root": root_label, "side": "Improved", "item": name, "n": float(imp)})
+        if deg > 0:
+            rows.append({"root": root_label, "side": "Degraded", "item": name, "n": float(deg)})
+    if not rows:
+        return pd.DataFrame(columns=["root", "side", "item", "n"])
+    return pd.DataFrame(rows)
+
+
+def _comparison_lens_treemap_figure(tdf: pd.DataFrame, title: str) -> Optional[go.Figure]:
+    if tdf.empty or "n" not in tdf.columns:
+        return None
+    fig = px.treemap(
+        tdf,
+        path=["root", "side", "item"],
+        values="n",
+        color="side",
+        color_discrete_map={"Improved": "#1a9850", "Degraded": "#d73027"},
+    )
+    fig.update_traces(
+        textfont_size=12,
+        textinfo="label+value+percent parent",
+        hovertemplate=("<b>%{label}</b><br>GT objects: %{value:.0f}<br>% of parent: %{percentParent}<extra></extra>"),
+        marker_line_width=1.5,
+        marker_line_color="rgba(255,255,255,0.45)",
+        root_color="rgba(240,240,245,0.95)",
+    )
+    _apply_detection_theme(fig, title)
+    fig.update_layout(height=430, margin=dict(t=20, l=2, r=2, b=2), paper_bgcolor="rgba(0,0,0,0)")
+    return fig
+
+
+def _views_have_error_columns(con: duckdb.DuckDBPyConnection, view_names: Sequence[str]) -> bool:
+    if not view_names:
+        return False
+    sample_df = con.execute(f"SELECT * FROM {view_names[0]} LIMIT 1").df()
+    return all(col in sample_df.columns for col in ["x_error", "y_error", "yaw_error"])
+
+
+def _decorate_distance_bins(df: pd.DataFrame) -> pd.DataFrame:
+    if df.empty or "distance_bin" not in df.columns:
+        return df
+    df = df.copy()
+    df["bin_order"] = df["distance_bin"].map(_distance_bin_sort_key)
+    df["bin_label"] = df["distance_bin"]
+    return df.sort_values("bin_order")
+
+
+def _distance_bin_sort_key(label: str) -> int:
+    try:
+        return _distance_bin_order().index(str(label))
+    except ValueError:
+        return len(_distance_bin_order()) + 1
+
+
+def _compare_color(index: int) -> str:
+    return _COMPARE_RUN_COLORS[index % len(_COMPARE_RUN_COLORS)]
+
+
+def _tpr_spider_compare_figure(
+    df_all: pd.DataFrame,
+    categories: List[str],
+    title: str,
+    run_order: List[str],
+    *,
+    height: int = 440,
+) -> go.Figure:
+    fig = go.Figure()
+    for i, run_lbl in enumerate(run_order):
+        sub = df_all[df_all["run"] == run_lbl].drop_duplicates("label").set_index("label")
+        r_vals = [float(sub.loc[c, "tpr"]) if c in sub.index else 0.0 for c in categories]
+        r_closed = r_vals + r_vals[:1]
+        theta = categories + categories[:1]
+        c = _compare_color(i)
+        fig.add_trace(
+            go.Scatterpolar(
+                r=r_closed,
+                theta=theta,
+                name=str(run_lbl),
+                line=dict(color=c, width=2),
+                fillcolor=f"rgba({int(c[1:3],16)},{int(c[3:5],16)},{int(c[5:7],16)},0.12)",
+                fill="toself",
+                hovertemplate="%{theta}<br>TP rate: %{r:.2%}<extra></extra>",
+            )
+        )
+    _apply_detection_theme(fig, title)
+    fig.update_layout(
+        height=height,
+        polar=dict(
+            radialaxis=dict(visible=True, range=[0, 1], tickformat=".0%", gridcolor="rgba(0,0,0,0.08)"),
+            angularaxis=dict(tickfont=dict(size=10)),
+        ),
+        legend=dict(orientation="h", yanchor="bottom", y=-0.12, xanchor="center", x=0.5),
+    )
+    return fig
+
+
+def _scalar_metric_spider_compare_figure(
+    df_all: pd.DataFrame,
+    categories: List[str],
+    title: str,
+    run_order: List[str],
+    value_col: str,
+    hover_metric: str,
+    *,
+    height: int = 380,
+    tickformat: str = ".3f",
+) -> go.Figure:
+    fig = go.Figure()
+    max_r = 0.0
+    traces_r: List[List[float]] = []
+    for run_lbl in run_order:
+        sub = df_all[df_all["run"] == run_lbl].drop_duplicates("label").set_index("label")
+        r_vals = [float(sub.loc[c, value_col]) if c in sub.index and pd.notna(sub.loc[c, value_col]) else 0.0 for c in categories]
+        traces_r.append(r_vals)
+        if r_vals:
+            max_r = max(max_r, max(r_vals))
+    r_max = max(max_r * 1.08, 1.0)
+    for i, run_lbl in enumerate(run_order):
+        r_vals = traces_r[i]
+        r_closed = r_vals + r_vals[:1]
+        theta = categories + categories[:1]
+        c = _compare_color(i)
+        fig.add_trace(
+            go.Scatterpolar(
+                r=r_closed,
+                theta=theta,
+                name=str(run_lbl),
+                line=dict(color=c, width=2),
+                fillcolor=f"rgba({int(c[1:3],16)},{int(c[3:5],16)},{int(c[5:7],16)},0.12)",
+                fill="toself",
+                hovertemplate="%{theta}<br>" + hover_metric + ": %{r:" + tickformat + "}<extra></extra>",
+            )
+        )
+    _apply_detection_theme(fig, title)
+    fig.update_layout(
+        height=height,
+        polar=dict(
+            radialaxis=dict(visible=True, range=[0, r_max], tickformat=tickformat, gridcolor="rgba(0,0,0,0.08)"),
+            angularaxis=dict(tickfont=dict(size=9)),
+        ),
+        legend=dict(orientation="h", yanchor="bottom", y=-0.18, xanchor="center", x=0.5),
+    )
+    return fig
+
+
+def _make_text_placeholder_figure(text: str) -> go.Figure:
+    fig = go.Figure()
+    fig.add_annotation(text=text, x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=16, color="#475569"))
+    fig.update_xaxes(visible=False)
+    fig.update_yaxes(visible=False)
+    fig.update_layout(
+        height=240,
+        template="plotly_white",
+        margin=dict(t=20, b=20, l=20, r=20),
+        paper_bgcolor="rgba(248,250,252,0.9)",
+        plot_bgcolor="rgba(255,255,255,0.95)",
+    )
+    return fig
+
+
+def _build_score_view(df_raw: pd.DataFrame, criteria_idx: int) -> pd.DataFrame:
+    return build_score_view(df_raw, criteria_idx)
+
+
+def _create_eval_flat_view(con: duckdb.DuckDBPyConnection, parquet_path: str, view_name: str) -> None:
+    query = f"""
+    CREATE OR REPLACE VIEW {view_name} AS
+    WITH src AS (
+        SELECT * FROM parquet_scan('{parquet_path}')
+        UNION BY NAME
+        SELECT CAST(NULL AS VARCHAR) AS visibility,
+               CAST(NULL AS VARCHAR) AS suite_name,
+               CAST(NULL AS VARCHAR) AS scenario_name,
+               CAST(NULL AS VARCHAR) AS t4dataset_name
+        WHERE FALSE
+    ),
+    base AS (
+        SELECT
+            * REPLACE (coalesce(CAST(visibility AS VARCHAR), 'not available') AS visibility),
+            sqrt(CAST(x AS DOUBLE)*CAST(x AS DOUBLE) + CAST(y AS DOUBLE)*CAST(y AS DOUBLE)) AS dist_h
+        FROM src
+        WHERE x IS NOT NULL AND y IS NOT NULL
+    )
+    SELECT
+        *,
+        {_DISTANCE_BIN_CASE} AS distance_bin
+    FROM base
+    """
+    con.execute(query)
+
+
+def _kpi_row_for_view(con: duckdb.DuckDBPyConnection, view_name: str) -> Optional[dict]:
+    query = f"""
+    SELECT
+        COUNT(*) FILTER (WHERE source = 'GT' AND status = 'TP') AS tp_gt,
+        COUNT(*) FILTER (WHERE source = 'GT' AND status = 'FN') AS fn,
+        COUNT(*) FILTER (WHERE source = 'EST' AND status = 'TP') AS tp_est,
+        COUNT(*) FILTER (WHERE source = 'EST' AND status = 'FP') AS fp
+    FROM {view_name}
+    WHERE dist_h < 50
+    """
+    row = con.execute(query).fetchone()
+    if not row:
+        return None
+    tp_gt, fn, tp_est, fp = [int(x or 0) for x in row]
+    gt_total = tp_gt + fn
+    est_total = tp_est + fp
+    tpr = (tp_gt / gt_total) if gt_total > 0 else None
+    precision = (tp_est / est_total) if est_total > 0 else None
+    recall = tpr
+    if precision is not None and recall is not None and (precision + recall) > 0:
+        f1 = 2 * precision * recall / (precision + recall)
+    else:
+        f1 = None
+    return {
+        "tp": tp_gt,
+        "fp": fp,
+        "fn": fn,
+        "tpr": tpr,
+        "precision": precision,
+        "f1": f1,
+    }
+
+
+def _query_status_counts(con: duckdb.DuckDBPyConnection, views: Sequence[Tuple[str, str]]) -> pd.DataFrame:
+    parts = [
+        f"SELECT '{lbl}' AS run, label, status, COUNT(*) AS num "
+        f"FROM {view_name} WHERE dist_h < 50 GROUP BY label, status"
+        for lbl, view_name in views
+    ]
+    if not parts:
+        return pd.DataFrame()
+    query = " UNION ALL ".join(parts) + " ORDER BY run, label, status"
+    return con.execute(query).df()
+
+
+def _query_distance_tpr(con: duckdb.DuckDBPyConnection, views: Sequence[Tuple[str, str]]) -> pd.DataFrame:
+    frames = []
+    for lbl, view_name in views:
+        query = f"""
+        WITH stats AS (
+            SELECT
+                distance_bin,
+                COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) AS gt_total,
+                COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS tp_gt
+            FROM {view_name}
+            WHERE dist_h < 150
+            GROUP BY distance_bin
+        )
+        SELECT
+            '{lbl}' AS run,
+            distance_bin,
+            CASE WHEN gt_total > 0 THEN CAST(tp_gt AS DOUBLE) / gt_total ELSE 0 END AS tpr
+        FROM stats
+        """
+        frames.append(con.execute(query).df())
+    if not frames:
+        return pd.DataFrame()
+    return pd.concat(frames, ignore_index=True)
+
+
+def _apply_criteria_theme(fig: go.Figure, title: str) -> None:
+    fig.update_layout(
+        template="plotly_white",
+        title=dict(text=title, font=dict(size=16, color="#0f172a"), x=0, xanchor="left", pad=dict(t=8, b=12)),
+        font=dict(family="system-ui, -apple-system, 'Segoe UI', sans-serif", size=12, color="#334155"),
+        paper_bgcolor="rgba(248, 250, 252, 0.92)",
+        plot_bgcolor="rgba(255, 255, 255, 0.95)",
+        margin=dict(l=56, r=28, t=72, b=52),
+        height=420,
+        hoverlabel=dict(bgcolor="white", font_size=13, font_family="system-ui"),
+        legend=dict(
+            title_text="",
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            bgcolor="rgba(255,255,255,0.7)",
+        ),
+    )
+    fig.update_xaxes(showgrid=True, gridcolor="rgba(148,163,184,0.25)", zeroline=False)
+    fig.update_yaxes(showgrid=True, gridcolor="rgba(148,163,184,0.25)", zeroline=False)
+
+
+def _apply_detection_theme(fig: go.Figure, title: str) -> None:
+    fig.update_layout(
+        title=dict(text=title, font=dict(size=14, color="#1f2937")),
+        font=dict(family='"Inter", "Segoe UI", sans-serif', size=11),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(248,250,252,0.6)",
+        margin=dict(t=48, b=40, l=52, r=24),
+        height=390,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            font=dict(size=11),
+        ),
+    )
+    fig.update_xaxes(
+        tickfont=dict(size=11),
+        title_font=dict(size=12),
+        gridcolor="rgba(0,0,0,0.08)",
+        zeroline=True,
+        zerolinecolor="rgba(0,0,0,0.15)",
+    )
+    fig.update_yaxes(
+        tickfont=dict(size=11),
+        title_font=dict(size=12),
+        gridcolor="rgba(0,0,0,0.08)",
+        zeroline=True,
+        zerolinecolor="rgba(0,0,0,0.15)",
+    )
+
+
+def _plotly_figure_to_image(fig: go.Figure, content_width: float, image_reader_cls):
+    from reportlab.platypus import Image
+
+    png_bytes = fig.to_image(format="png", width=1400, height=800, scale=2)
+    image_buffer = io.BytesIO(png_bytes)
+    reader = image_reader_cls(image_buffer)
+    img_width, img_height = reader.getSize()
+    target_width = content_width
+    target_height = target_width * (img_height / img_width)
+    image_buffer.seek(0)
+    return Image(image_buffer, width=target_width, height=target_height)
+
+
+def _styled_table(rows: Any, content_width: float):
+    from reportlab.lib import colors
+    from reportlab.lib.styles import getSampleStyleSheet
+    from reportlab.platypus import Table, TableStyle
+
+    col_width_weights = None
+    if isinstance(rows, dict):
+        col_width_weights = rows.get("col_width_weights")
+        rows = rows.get("rows", [])
+    if not rows:
+        rows = [["No data"]]
+    ncols = max(len(row) for row in rows)
+    styles = getSampleStyleSheet()
+    header_style = styles["BodyText"].clone("table_header")
+    header_style.fontName = "Helvetica-Bold"
+    header_style.fontSize = 8.5
+    header_style.leading = 10
+    body_style = styles["BodyText"].clone("table_body")
+    body_style.fontName = "Helvetica"
+    body_style.fontSize = 8.2
+    body_style.leading = 9.6
+    body_style.textColor = colors.HexColor("#0f172a")
+    normalized = []
+    for row_idx, row in enumerate(rows):
+        padded = list(row) + [""] * (ncols - len(row))
+        cell_style = header_style if row_idx == 0 else body_style
+        normalized.append([
+            _table_paragraph(cell, cell_style)
+            for cell in padded
+        ])
+    if col_width_weights and len(col_width_weights) == ncols:
+        total = sum(col_width_weights) or 1.0
+        col_widths = [content_width * (w / total) for w in col_width_weights]
+    else:
+        col_width = content_width / ncols
+        col_widths = [col_width] * ncols
+    table = Table(normalized, colWidths=col_widths, repeatRows=1)
+    table.setStyle(
+        TableStyle(
+            [
+                ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e2e8f0")),
+                ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#0f172a")),
+                ("GRID", (0, 0), (-1, -1), 0.4, colors.HexColor("#cbd5e1")),
+                ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8fafc")]),
+                ("VALIGN", (0, 0), (-1, -1), "TOP"),
+                ("LEFTPADDING", (0, 0), (-1, -1), 6),
+                ("RIGHTPADDING", (0, 0), (-1, -1), 6),
+                ("TOPPADDING", (0, 0), (-1, -1), 5),
+                ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
+            ]
+        )
+    )
+    return table
+
+
+def _distance_bin_order() -> List[str]:
+    return [
+        "[0,10)",
+        "[10,20)",
+        "[20,30)",
+        "[30,40)",
+        "[40,50)",
+        "[50,60)",
+        "[60,70)",
+        "[70,80)",
+        "[80,90)",
+        "[90,100)",
+        "[100,110)",
+        "[110,120)",
+        "[120,130)",
+        "[130,140)",
+        "[140,150)",
+        "[150,inf)",
+    ]
+
+
+def _ensure_reportlab_available() -> Optional[str]:
+    try:
+        import reportlab  # noqa: F401
+    except ImportError:
+        return "PDF export requires the `reportlab` package to be installed."
+    return None
+
+
+def _slugify(value: str) -> str:
+    clean = "".join(ch.lower() if ch.isalnum() else "_" for ch in str(value))
+    while "__" in clean:
+        clean = clean.replace("__", "_")
+    return clean.strip("_") or "report"
+
+
+def _fmt_number(value: Any) -> str:
+    if value is None or pd.isna(value):
+        return "N/A"
+    return f"{float(value):.2f}"
+
+
+def _fmt_percent(value: Any) -> str:
+    if value is None or pd.isna(value):
+        return "N/A"
+    return f"{100.0 * float(value):.1f}%"
+
+
+def _summarize_filter_values(values: Optional[Iterable[Any]], *, empty_label: str = "All") -> str:
+    if values is None:
+        return empty_label
+    vals = [str(v) for v in values if str(v).strip() != ""]
+    if not vals:
+        return empty_label
+    if len(vals) <= 6:
+        return ", ".join(vals)
+    return ", ".join(vals[:6]) + f", ... (+{len(vals) - 6} more)"
+
+
+def _shorten_scenario_name(value: str, *, max_len: int = 52) -> str:
+    text = str(value)
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3] + "..."
+
+
+def _table_paragraph(value: Any, style: Any):
+    from reportlab.platypus import Paragraph
+
+    text = html.escape("" if value is None else str(value)).replace("\n", "<br/>")
+    return Paragraph(text, style)
diff --git a/evaluation_dashboard_app/lib/overview_url_hydrate.py b/evaluation_dashboard_app/lib/overview_url_hydrate.py
new file mode 100644
index 0000000..8f9fc1a
--- /dev/null
+++ b/evaluation_dashboard_app/lib/overview_url_hydrate.py
@@ -0,0 +1,74 @@
+"""
+Rehydrate session_state from Overview URL query params when server-side session is empty.
+
+Overview syncs `mode`, `run_a`, `run_b`, ... via `st.query_params`. After a load-balancer hop to a
+different Streamlit replica, `st.session_state` may not contain `runA` even though the user already
+used Overview — the URL still encodes the selection. This module rebuilds `runA` / compare state
+from that URL so multipage analysis works without requiring Overview to run again on the same box.
+"""
+
+from __future__ import annotations
+
+import streamlit as st
+
+from lib.path_utils import get_data_root, get_run_display_name, get_run_storage_name, list_run_directories
+from lib.run_loader import load_run
+
+
+def try_hydrate_session_from_overview_query_params() -> bool:
+    """
+    If `runA` is missing but the URL has Overview-style params (`run_a`, optional `mode` / `run_b`…),
+    load runs and populate `session_state`. Returns True if `runA` is present afterward.
+    """
+    if "runA" in st.session_state:
+        return True
+    params = st.query_params
+    run_a_name = params.get("run_a")
+    if not run_a_name:
+        return False
+    root = get_data_root()
+    if not root.exists() or not root.is_dir():
+        return False
+    run_dirs = list_run_directories()
+    name_to_dir = {get_run_display_name(p): p for p in run_dirs}
+    name_to_dir.update({get_run_storage_name(p): p for p in run_dirs})
+    if run_a_name not in name_to_dir:
+        return False
+    mode_param = (params.get("mode") or "single").lower()
+    try:
+        if mode_param == "compare":
+            url_compare = [
+                params.get(k)
+                for k in ("run_b", "run_c", "run_d", "run_e")
+                if params.get(k)
+            ]
+            valid = [n for n in url_compare if n in name_to_dir]
+            if not valid:
+                return False
+            run_a_dir = name_to_dir[run_a_name]
+            compare_dirs = [name_to_dir[n] for n in valid]
+            all_dirs = [run_a_dir] + compare_dirs
+            run_labels = ["A"] + [chr(66 + i) for i in range(len(compare_dirs))]
+            all_runs = [load_run(d) for d in all_dirs]
+            st.session_state.update(
+                {
+                    "mode": "Compare Mode",
+                    "runA": all_runs[0],
+                    "all_runs": all_runs,
+                    "run_labels": run_labels,
+                    "df_cmp": None,
+                }
+            )
+            if len(all_runs) >= 2:
+                st.session_state["runB"] = all_runs[1]
+            else:
+                st.session_state["runB"] = None
+            return True
+        run_a = load_run(name_to_dir[run_a_name])
+        st.session_state["runA"] = run_a
+        st.session_state["mode"] = "Single Mode"
+        for key in ("all_runs", "run_labels", "runB", "df_cmp"):
+            st.session_state.pop(key, None)
+        return True
+    except Exception:
+        return False
diff --git a/evaluation_dashboard_app/lib/page_chrome.py b/evaluation_dashboard_app/lib/page_chrome.py
index 5bd6e08..d316661 100644
--- a/evaluation_dashboard_app/lib/page_chrome.py
+++ b/evaluation_dashboard_app/lib/page_chrome.py
@@ -76,7 +76,7 @@ def render_loaded_data_section(entries: Sequence[Tuple[str, str]]) -> None:
             f"""
             <div style="border-radius:14px;border-left:5px solid #1d4ed8;background:linear-gradient(90deg,#eff6ff 0%,#fff 100%);padding:0.95rem 1.1rem;">
               <div style="font-size:0.68rem;text-transform:uppercase;letter-spacing:0.1em;color:#64748b;font-weight:700;">{la}</div>
-              <div style="margin-top:0.35rem;font-family:ui-monospace,monospace;font-size:0.82rem;color:#0f172a;word-break:break-all;line-height:1.4;">{pa}</div>
+              <div style="margin-top:0.35rem;font-size:0.86rem;color:#0f172a;word-break:break-word;line-height:1.4;font-weight:650;">{pa}</div>
             </div>
             """,
             unsafe_allow_html=True,
@@ -95,7 +95,7 @@ def render_loaded_data_section(entries: Sequence[Tuple[str, str]]) -> None:
                 f"""
                 <div style="border-radius:14px;border-left:5px solid {acc};background:linear-gradient(90deg,#f8fafc 0%,#fff 100%);padding:0.95rem 1.1rem;min-height:4.5rem;">
                   <div style="font-size:0.68rem;text-transform:uppercase;letter-spacing:0.1em;color:#64748b;font-weight:700;">{la}</div>
-                  <div style="margin-top:0.35rem;font-family:ui-monospace,monospace;font-size:0.82rem;color:#0f172a;word-break:break-all;line-height:1.4;">{pa}</div>
+                  <div style="margin-top:0.35rem;font-size:0.86rem;color:#0f172a;word-break:break-word;line-height:1.4;font-weight:650;">{pa}</div>
                 </div>
                 """,
                 unsafe_allow_html=True,
diff --git a/evaluation_dashboard_app/lib/path_utils.py b/evaluation_dashboard_app/lib/path_utils.py
index ca698a4..5fa77a8 100644
--- a/evaluation_dashboard_app/lib/path_utils.py
+++ b/evaluation_dashboard_app/lib/path_utils.py
@@ -9,9 +9,12 @@
 """
 
 import os
+import re
 from pathlib import Path
 from typing import Optional, List, Tuple
 
+import yaml
+
 # Root for all evaluation data. Set EVAL_DASHBOARD_DATA_ROOT to override (e.g. /var/eval_dashboard/data).
 _DATA_ROOT: Optional[Path] = None
 
@@ -112,12 +115,122 @@ def resolve_under_data_root(
         return None, str(e)
 
 
+def _looks_like_analysis_run(path: Path) -> bool:
+    return (
+        (path / "Summary.csv").exists()
+        or (path / "Score.csv").exists()
+        or any(path.glob("*.parquet"))
+        or (path / "current.csv").exists()
+        or (path / "future.csv").exists()
+    )
+
+
+def _is_internal_trend_release_dir(path: Path) -> bool:
+    return path.name.startswith("trend_release_")
+
+
+RELEASE_ROLE_DIRS = ("performance", "usecase", "devops")
+RELEASE_ROLE_LABELS = {
+    "performance": "Performance",
+    "usecase": "Usecase",
+    "devops": "DevOps",
+}
+_PILOT_AUTO_PREFIX_PATTERN = re.compile(r"^\s*Pilot\.Auto\s*", re.IGNORECASE)
+
+
+def _looks_like_release_container(path: Path) -> bool:
+    return (
+        (path / "metadata.yaml").exists()
+        and any((path / name).is_dir() for name in RELEASE_ROLE_DIRS)
+        and not _looks_like_analysis_run(path)
+    )
+
+
+def _load_yaml_metadata(path: Path) -> dict:
+    if not path.is_file():
+        return {}
+    try:
+        with path.open("r", encoding="utf-8") as fh:
+            data = yaml.safe_load(fh) or {}
+    except (OSError, yaml.YAMLError):
+        return {}
+    return data if isinstance(data, dict) else {}
+
+
+def _compact_release_version(metadata: dict, fallback: str) -> str:
+    version = str(metadata.get("version_abbr") or metadata.get("pilot_auto_version") or "").strip()
+    if not version:
+        return fallback
+    version = _PILOT_AUTO_PREFIX_PATTERN.sub("", version).strip() or version
+    version = version.replace("/", "-")
+    return version
+
+
+def _release_run_display_name(run_path: Path) -> Optional[str]:
+    role_label = ""
+    release_dir = run_path
+    if run_path.name in RELEASE_ROLE_LABELS and _looks_like_release_container(run_path.parent):
+        release_dir = run_path.parent
+        role_label = RELEASE_ROLE_LABELS[run_path.name]
+    elif _looks_like_release_container(run_path):
+        role_label = "Release"
+    else:
+        return None
+
+    metadata = _load_yaml_metadata(run_path / "metadata.yaml") or _load_yaml_metadata(release_dir / "metadata.yaml")
+    version = _compact_release_version(metadata, release_dir.name.replace("release_spec_", ""))
+    date = str(metadata.get("date") or "").strip()
+    parts = [f"[REL] {version}"]
+    if role_label:
+        parts.append(role_label)
+    if date:
+        parts.append(date)
+    return " | ".join(parts)
+
+
+def get_run_display_name(run_path: Path) -> str:
+    """Return a stable user-facing run selector name."""
+    release_name = _release_run_display_name(run_path)
+    if release_name:
+        return release_name
+    root = get_data_root()
+    try:
+        return run_path.resolve().relative_to(root).as_posix()
+    except Exception:
+        return run_path.name
+
+
+def get_run_storage_name(run_path: Path) -> str:
+    """Return the raw path-like run name relative to the data root."""
+    root = get_data_root()
+    try:
+        return run_path.resolve().relative_to(root).as_posix()
+    except Exception:
+        return run_path.name
+
+
 def list_run_directories() -> List[Path]:
-    """Return sorted list of run directories (immediate subdirs of data root) that exist."""
+    """Return sorted run directories, including release analysis children."""
     root = get_data_root()
     if not root.exists():
         return []
-    return sorted([p for p in root.iterdir() if p.is_dir()])
+    runs: List[Path] = []
+    seen = set()
+    for child in sorted([p for p in root.iterdir() if p.is_dir()]):
+        if _is_internal_trend_release_dir(child):
+            continue
+        resolved = child.resolve()
+        if resolved not in seen and not _looks_like_release_container(child):
+            runs.append(child)
+            seen.add(resolved)
+        for release_child_name in RELEASE_ROLE_DIRS:
+            release_child = child / release_child_name
+            if release_child.is_dir() and _looks_like_analysis_run(release_child):
+                release_resolved = release_child.resolve()
+                if release_resolved not in seen:
+                    runs.append(release_child)
+                    seen.add(release_resolved)
+    return sorted(runs, key=get_run_display_name)
 
 
 def count_tlr_scenarios(path: Path) -> int:
@@ -174,7 +287,7 @@ def get_run_info(run_path: Path) -> dict:
     has_score = (run_path / "Score.csv").exists()
     has_parquet = any(run_path.glob("*.parquet"))
     return {
-        "name": run_path.name,
+        "name": get_run_display_name(run_path),
         "path": run_path,
         "size_bytes": size_bytes,
         "mtime": mtime,
@@ -186,23 +299,29 @@ def get_run_info(run_path: Path) -> dict:
 
 def resolve_run_subdirectory(run_name: str) -> Tuple[Optional[Path], str]:
     """
-    Resolve a run directory by name (must be a direct child of data root).
+    Resolve a run directory by display name under the data root.
     Returns (path, "") on success, or (None, error_message).
     """
     root = get_data_root()
     if not run_name or run_name.strip() != run_name:
         return None, "Invalid run name."
-    if os.sep in run_name or "/" in run_name or ".." in run_name:
+    if "\x00" in run_name or "\\" in run_name:
         return None, "Invalid run name."
-    run_path = root / run_name
-    if not run_path.exists():
-        return None, f"Run does not exist: {run_name}"
-    if not run_path.is_dir():
-        return None, "Not a directory."
+    display_matches = [path for path in list_run_directories() if get_run_display_name(path) == run_name]
+    if display_matches:
+        return display_matches[0], ""
+
+    run_path = (root / run_name).resolve()
     try:
         run_path.relative_to(root)
     except ValueError:
         return None, "Run is not under data root."
+    if run_path == root:
+        return None, "Invalid run name."
+    if not run_path.exists():
+        return None, f"Run does not exist: {run_name}"
+    if not run_path.is_dir():
+        return None, "Not a directory."
     return run_path, ""
 
 
diff --git a/evaluation_dashboard_app/lib/perception_catalog_io.py b/evaluation_dashboard_app/lib/perception_catalog_io.py
index 2b954af..3a59504 100644
--- a/evaluation_dashboard_app/lib/perception_catalog_io.py
+++ b/evaluation_dashboard_app/lib/perception_catalog_io.py
@@ -337,38 +337,62 @@ def build_scene_dataframe_from_pkl_dir(
 
     total = len(pkl_files)
     df = SceneDataFrame(current=pd.DataFrame())
+
+    def _report_progress(done: int) -> None:
+        if on_progress:
+            on_progress(done, total)
+
     for i, pkl_file in enumerate(pkl_files):
-        if str(pkl_file).lower().endswith(".pkl.z"):
-            try:
-                data = joblib.load(pkl_file)
-            except NameError:
-                raise ImportError("joblib is required for .pkl.z: pip install joblib")
-        else:
-            with open(pkl_file, "rb") as f:
-                data = pickle.load(f)
+        try:
+            if str(pkl_file).lower().endswith(".pkl.z"):
+                try:
+                    data = joblib.load(pkl_file)
+                except NameError:
+                    raise ImportError("joblib is required for .pkl.z: pip install joblib")
+            else:
+                with open(pkl_file, "rb") as f:
+                    data = pickle.load(f)
+        except Exception as e:
+            if on_skip:
+                on_skip(pkl_file, f"failed to load: {e}")
+                _report_progress(i + 1)
+                continue
+            raise
         data = _normalize_loaded_pkl(
             data,
             pkl_file=pkl_file,
             project_id=project_id,
             job_id=job_id,
         )
-        df_ = _scenarios_to_df_local(data, scenario_parser_function=scene2df, debug=False)
+        try:
+            df_ = _scenarios_to_df_local(data, scenario_parser_function=scene2df, debug=False)
+        except Exception as e:
+            if on_skip:
+                on_skip(pkl_file, f"failed to convert: {e}")
+                _report_progress(i + 1)
+                continue
+            raise
         del data
         if df_.empty():
             if skip_empty:
                 if on_skip:
                     on_skip(pkl_file, "empty")
+                del df_
+                gc.collect()
+                _report_progress(i + 1)
                 continue
         if skip_bad_dtype and hasattr(df_, "current") and "x_error" in getattr(df_.current, "columns", []):
             if df_.current["x_error"].dtype != "float64":
                 if on_skip:
                     on_skip(pkl_file, f"bad dtype x_error={df_.current['x_error'].dtype}")
+                del df_
+                gc.collect()
+                _report_progress(i + 1)
                 continue
         df = df.concatenate(df_)
         del df_
         gc.collect()
-        if on_progress:
-            on_progress(i + 1, total)
+        _report_progress(i + 1)
     return df
 
 
diff --git a/evaluation_dashboard_app/lib/perception_eval_result_summarizer.py b/evaluation_dashboard_app/lib/perception_eval_result_summarizer.py
index 770fd43..6083498 100644
--- a/evaluation_dashboard_app/lib/perception_eval_result_summarizer.py
+++ b/evaluation_dashboard_app/lib/perception_eval_result_summarizer.py
@@ -498,86 +498,146 @@ def calc_score_single(df, result_directory):
         return {}
     found_gt, pos, prev_frame, uuid_list, obj_idx = False, [], -1, [], 0
     res, obj_group, criteria_max_dist = get_option_and_object_group(result_directory)
+
+    frame_data = {}
     for i in range(total_row_num):
-        if (
-            isnull(df.loc[(i, "ground_truth"), "timestamp"])
-            # or df.loc[(i, "ground_truth"), "frame"] == prev_frame
-        ):
-            continue
+        frame_num = df.loc[(i, "estimation"), "frame"]
+        if isnull(frame_num):
+            frame_num = df.loc[(i, "ground_truth"), "frame"]
 
-        if df.loc[(i, "ground_truth"), "frame"] == prev_frame:
-            obj_idx += 1
-        else:
-            obj_idx = 0
-
-        prev_frame = df.loc[(i, "ground_truth"), "frame"]
-        act_x = df.loc[(i, "ground_truth"), "x"]
-        act_y = df.loc[(i, "ground_truth"), "y"]
-        act_dist = math.sqrt(act_x**2 + act_y**2)
-        act_vx = df.loc[(i, "ground_truth"), "vx"]
-        act_vy = df.loc[(i, "ground_truth"), "vy"]
-        # act_vel = math.sqrt(act_vx**2 + act_vy**2)
-        point = {"x": -act_y, "y": act_x, "dist": act_dist, "vx": -act_vx, "vy": act_vy}
-
-        if act_dist < criteria_max_dist[0]:
-            key = "criteria0"
-            dist_err_torelance = 2
-        elif act_dist < criteria_max_dist[1]:
-            key = "criteria1"
-            dist_err_torelance = 3
-        elif act_dist < criteria_max_dist[2]:
-            key = "criteria2"
-            dist_err_torelance = 5
-        elif act_dist < criteria_max_dist[3]:
-            key = "criteria3"
-            dist_err_torelance = 5
-        else:
-            raise ValueError("act_dist is out of range")
-
-        act_label = df.loc[(i, "ground_truth"), "label"]
-        if not found_gt:
-            found_gt = True
-            res["criteria0"]["GT_OBJ"] = df.loc[(i, "ground_truth"), "label"]
-            res["criteria1"]["GT_OBJ"] = df.loc[(i, "ground_truth"), "label"]
-            res["criteria2"]["GT_OBJ"] = df.loc[(i, "ground_truth"), "label"]
+        if frame_num not in frame_data:
+            frame_data[frame_num] = {"ground_truth": [], "estimation": []}
 
         if not isnull(df.loc[(i, "estimation"), "timestamp"]):
-            est_label = df.loc[(i, "estimation"), "label"]
-            if act_label != "false_positive":
-                est_x = df.loc[(i, "estimation"), "x"]
-                est_y = df.loc[(i, "estimation"), "y"]
-                diff_dist = math.sqrt((act_x - est_x) ** 2 + (act_y - est_y) ** 2)
-                est_uuid = df.loc[(i, "estimation"), "uuid"]
-                if est_uuid not in uuid_list:
-                    uuid_list.append(est_uuid)
-                # print("param:", df.loc[(i, "estimation"), "timestamp"], act_x, act_y, act_label, est_x, est_y, est_label, diff_dist)
+            frame_data[frame_num]["estimation"].append(
+                {
+                    "index": i,
+                    "x": df.loc[(i, "estimation"), "x"],
+                    "y": df.loc[(i, "estimation"), "y"],
+                    "label": df.loc[(i, "estimation"), "label"],
+                    "uuid": df.loc[(i, "estimation"), "uuid"],
+                    "timestamp": df.loc[(i, "estimation"), "timestamp"],
+                }
+            )
 
-                if act_label == est_label:
-                    if diff_dist < dist_err_torelance:
-                        status = "TP/TN"
+        if not isnull(df.loc[(i, "ground_truth"), "timestamp"]):
+            frame_data[frame_num]["ground_truth"].append(
+                {
+                    "index": i,
+                    "x": df.loc[(i, "ground_truth"), "x"],
+                    "y": df.loc[(i, "ground_truth"), "y"],
+                    "label": df.loc[(i, "ground_truth"), "label"],
+                    "vx": df.loc[(i, "ground_truth"), "vx"],
+                    "vy": df.loc[(i, "ground_truth"), "vy"],
+                    "frame": df.loc[(i, "ground_truth"), "frame"],
+                }
+            )
+
+    for frame_num in sorted(frame_data.keys()):
+        frame_gt_list = frame_data[frame_num]["ground_truth"]
+        frame_est_list = frame_data[frame_num]["estimation"]
+
+        for gt_obj in frame_gt_list:
+            i = gt_obj["index"]
+            act_uuid = df.loc[(i, "ground_truth"), "uuid"]
+            est_uuid = ""
+            prev_frame = df.loc[(i, "ground_truth"), "frame"]
+
+            act_x = gt_obj["x"]
+            act_y = gt_obj["y"]
+            act_dist = math.sqrt(act_x**2 + act_y**2)
+            act_vx = gt_obj["vx"]
+            act_vy = gt_obj["vy"]
+            point = {"x": -act_y, "y": act_x, "dist": act_dist, "vx": -act_vx, "vy": act_vy}
+
+            if act_dist < criteria_max_dist[0]:
+                key = "criteria0"
+                dist_err_torelance = 2
+            elif act_dist < criteria_max_dist[1]:
+                key = "criteria1"
+                dist_err_torelance = 3
+            elif act_dist < criteria_max_dist[2]:
+                key = "criteria2"
+                dist_err_torelance = 5
+            elif act_dist < criteria_max_dist[3]:
+                key = "criteria3"
+                dist_err_torelance = 5
+            else:
+                raise ValueError("act_dist is out of range")
+
+            act_label = gt_obj["label"]
+            if not found_gt:
+                found_gt = True
+                res["criteria0"]["GT_OBJ"] = act_label
+                res["criteria1"]["GT_OBJ"] = act_label
+                res["criteria2"]["GT_OBJ"] = act_label
+
+            if not isnull(df.loc[(i, "estimation"), "timestamp"]):
+                est_label = df.loc[(i, "estimation"), "label"]
+                est_uuid = df.loc[(i, "estimation"), "uuid"]
+                if act_label != "false_positive":
+                    est_x = df.loc[(i, "estimation"), "x"]
+                    est_y = df.loc[(i, "estimation"), "y"]
+                    diff_dist = math.sqrt((act_x - est_x) ** 2 + (act_y - est_y) ** 2)
+
+                    if est_uuid not in uuid_list:
+                        uuid_list.append(est_uuid)
+
+                    if act_label == est_label:
+                        if diff_dist < dist_err_torelance:
+                            status = "TP/TN"
+                        else:
+                            status = "ADD"
+                    elif est_label in obj_group[act_label]:
+                        status = "AIL"
                     else:
-                        status = "ADD"
-                elif est_label in obj_group[act_label]:
-                    status = "AIL"
+                        status = "UIL"
                 else:
-                    status = "UIL"
-            else:
-                status = "PFN/PFP"
-            res[key]["OBJ_CNTS"].setdefault(est_label, 0)
-            res[key]["OBJ_CNTS"][est_label] += 1
-        else:
-            if act_label != "false_positive":
-                status = "PFN/PFP"
+                    status = "PFN/PFP"
+                res[key]["OBJ_CNTS"].setdefault(est_label, 0)
+                res[key]["OBJ_CNTS"][est_label] += 1
             else:
-                status = "TP/TN"
-        res[key][status] += 1
-        res[key]["NM"] += 1
-        res[key]["UUID_NUM"] = len(uuid_list)
-        point["status"] = status
-        point["uuid_num"] = len(uuid_list)
-        if obj_idx == len(pos):
-            pos.append([])
-        pos[obj_idx].append(point)
+                if act_label != "false_positive":
+                    closest_dist = float("inf")
+                    closest_est = None
+
+                    for est_obj in frame_est_list:
+                        diff_dist = math.sqrt((act_x - est_obj["x"]) ** 2 + (act_y - est_obj["y"]) ** 2)
+                        if diff_dist < closest_dist:
+                            closest_dist = diff_dist
+                            closest_est = est_obj
+
+                    if closest_est is not None and closest_dist < 1.0:
+                        est_label = closest_est["label"]
+                        est_uuid = closest_est["uuid"]
+
+                        if est_uuid is not None and est_uuid not in uuid_list:
+                            uuid_list.append(est_uuid)
+
+                        if act_label == est_label:
+                            if closest_dist < dist_err_torelance:
+                                status = "TP/TN"
+                            else:
+                                status = "ADD"
+                        elif est_label in obj_group[act_label]:
+                            status = "AIL"
+                        else:
+                            status = "UIL"
+
+                        res[key]["OBJ_CNTS"].setdefault(est_label, 0)
+                        res[key]["OBJ_CNTS"][est_label] += 1
+                    else:
+                        status = "PFN/PFP"
+                else:
+                    status = "TP/TN"
+
+            res[key][status] += 1
+            res[key]["NM"] += 1
+            res[key]["UUID_NUM"] = len(uuid_list)
+            point["status"] = status
+            point["act_uuid"] = act_uuid
+            point["est_uuid"] = est_uuid
+            pos.append(point)
 
     with open(result_directory + "score.json", "w") as file:
         file.write(json.dumps(res, indent=4))
diff --git a/evaluation_dashboard_app/lib/prediction_eval.py b/evaluation_dashboard_app/lib/prediction_eval.py
new file mode 100644
index 0000000..b8f3e4e
--- /dev/null
+++ b/evaluation_dashboard_app/lib/prediction_eval.py
@@ -0,0 +1,555 @@
+from __future__ import annotations
+
+from typing import Callable, Iterable, Sequence
+
+import numpy as np
+import pandas as pd
+
+
+DISTANCE_BIN_LABELS: list[str] = [
+    "0-20 m",
+    "20-40 m",
+    "40-60 m",
+    "60-80 m",
+    "80-100 m",
+    "100-120 m",
+    "120-140 m",
+    "140-160 m",
+    "160-180 m",
+    "180-200 m",
+    "200+ m",
+]
+
+
+def actor_bucket(label: str | None) -> str:
+    value = str(label or "").strip().lower()
+    if value in {"car", "truck", "bus", "trailer"}:
+        return "vehicle"
+    if value == "pedestrian":
+        return "pedestrian"
+    if value in {"bicycle", "motorbike", "motorcycle"}:
+        return "bicycle"
+    return "other"
+
+
+def infer_scenario_context(name: str | None) -> str:
+    text = str(name or "").strip().lower().replace("_", " ").replace("-", " ")
+    if any(token in text for token in ("crosswalk", "crossing", "jaywalk")):
+        return "crossing"
+    if any(token in text for token in ("merge", "ramp")):
+        return "merge"
+    if any(token in text for token in ("same lane", "follow", "following")):
+        return "same-lane"
+    if any(token in text for token in ("left turn", "right turn", "uturn", "u turn", "turn")):
+        return "turning"
+    if any(token in text for token in ("cut in", "cutin", "lane change", "overtake")):
+        return "cut-in"
+    return "other"
+
+
+def _metric_label(prefix: str, checkpoint: float | int) -> str:
+    if float(checkpoint).is_integer():
+        checkpoint = int(checkpoint)
+    return f"{prefix}@{checkpoint}s"
+
+
+def _distance_bin(value: float | int | None) -> str | pd.NA:
+    if value is None or pd.isna(value):
+        return pd.NA
+    edges = list(range(0, 201, 20))
+    for start, end, label in zip(edges[:-1], edges[1:], DISTANCE_BIN_LABELS[:-1]):
+        if start <= float(value) < end:
+            return label
+    return DISTANCE_BIN_LABELS[-1]
+
+
+def _ensure_numeric(df: pd.DataFrame, columns: Iterable[str]) -> pd.DataFrame:
+    out = df.copy()
+    for col in columns:
+        if col in out.columns:
+            out[col] = pd.to_numeric(out[col], errors="coerce")
+    return out
+
+
+def _noop_progress(_: float, __: str) -> None:
+    return None
+
+
+def _parse_r_upper_bound(label: object) -> float:
+    text = str(label)
+    try:
+        return float(text.split("-")[-1])
+    except ValueError:
+        return float("inf")
+
+
+def prepare_future_matched_df(
+    future_df: pd.DataFrame,
+    *,
+    time_step: float = 1.0,
+    coord_abs_limit: float = 1e6,
+    max_error_m: float = 200.0,
+) -> pd.DataFrame:
+    df = future_df.copy()
+    df = _ensure_numeric(df, ("frame_index", "relative_time", "x", "y", "tx", "ty", "mode"))
+    df["frame_index_num"] = df["frame_index"]
+    df["aligned_horizon_sec"] = (df["relative_time"] / max(time_step, 1e-6)).round() * time_step
+    if {"x", "y"}.issubset(df.columns):
+        df["start_distance_m"] = np.sqrt(df["x"].pow(2) + df["y"].pow(2))
+    else:
+        df["start_distance_m"] = np.sqrt(df["tx"].pow(2) + df["ty"].pow(2))
+
+    key_cols = ["suite_name", "scenario_name", "frame_index_num"]
+
+    gt = df[df["source"].astype(str).str.upper() == "GT"].copy()
+    est = df[df["source"].astype(str).str.upper() == "EST"].copy()
+    if "confidence" not in est.columns:
+        est["confidence"] = np.nan
+
+    gt_start = (
+        gt.sort_values("relative_time")
+        .groupby(key_cols + ["uuid"], dropna=False)
+        .agg(start_distance_m=("start_distance_m", "first"))
+        .reset_index()
+        .rename(columns={"uuid": "uuid_gt"})
+    )
+
+    gt_h = (
+        gt[key_cols + ["uuid", "aligned_horizon_sec", "tx", "ty", "label"]]
+        .dropna(subset=["aligned_horizon_sec"])
+        .drop_duplicates(key_cols + ["uuid", "aligned_horizon_sec"])
+        .rename(
+            columns={
+                "uuid": "uuid_gt",
+                "tx": "tx_f_gt",
+                "ty": "ty_f_gt",
+                "label": "label_gt",
+            }
+        )
+    )
+    est_h = (
+        est[key_cols + ["uuid", "pair_uuid", "mode", "aligned_horizon_sec", "tx", "ty", "confidence"]]
+        .dropna(subset=["aligned_horizon_sec"])
+        .drop_duplicates(key_cols + ["uuid", "pair_uuid", "mode", "aligned_horizon_sec"])
+        .rename(
+            columns={
+                "uuid": "uuid_est",
+                "pair_uuid": "uuid_gt",
+                "tx": "tx_f_est",
+                "ty": "ty_f_est",
+                "confidence": "confidence_est",
+            }
+        )
+    )
+
+    matched = est_h.merge(gt_h, on=key_cols + ["uuid_gt", "aligned_horizon_sec"], how="inner")
+    if matched.empty:
+        return matched
+
+    matched = matched.merge(gt_start, on=key_cols + ["uuid_gt"], how="left")
+    matched["track_key"] = (
+        matched["scenario_name"].astype("string").fillna("")
+        + "::"
+        + matched["frame_index_num"].fillna(-1).astype(int).astype(str)
+        + "::"
+        + matched["uuid_est"].astype("string").fillna("")
+    )
+    matched["disp_error_m"] = np.sqrt(
+        (matched["tx_f_est"] - matched["tx_f_gt"]).pow(2) + (matched["ty_f_est"] - matched["ty_f_gt"]).pow(2)
+    )
+    matched["is_coordinate_outlier"] = (
+        matched[["tx_f_est", "ty_f_est", "tx_f_gt", "ty_f_gt"]].abs().gt(coord_abs_limit).any(axis=1)
+    )
+    matched["is_metric_outlier"] = matched["is_coordinate_outlier"] | matched["disp_error_m"].gt(max_error_m)
+    matched["actor_bucket"] = matched["label_gt"].map(actor_bucket)
+    matched["scenario_context"] = matched["scenario_name"].map(infer_scenario_context)
+    return matched
+
+
+def build_future_mode_track_summary(
+    future_df: pd.DataFrame,
+    *,
+    checkpoints: Sequence[float] = (1.0, 2.0, 3.0),
+    time_step: float = 1.0,
+    coord_abs_limit: float = 1e6,
+    max_error_m: float = 200.0,
+) -> pd.DataFrame:
+    matched = prepare_future_matched_df(
+        future_df,
+        time_step=time_step,
+        coord_abs_limit=coord_abs_limit,
+        max_error_m=max_error_m,
+    )
+    return build_future_mode_track_summary_from_matched(matched, checkpoints=checkpoints)
+
+
+def build_future_mode_track_summary_from_matched(
+    matched: pd.DataFrame,
+    *,
+    checkpoints: Sequence[float] = (1.0, 2.0, 3.0),
+) -> pd.DataFrame:
+    if matched.empty:
+        return pd.DataFrame(
+            columns=[
+                "track_key",
+                "suite_name",
+                "scenario_name",
+                "frame_index_num",
+                "uuid_gt",
+                "uuid_est",
+                "label_gt",
+                "mode_count",
+                "start_distance_m",
+            ]
+        )
+
+    sane = matched[~matched["is_metric_outlier"]].copy()
+    if sane.empty:
+        sane = matched.copy()
+
+    group_cols = ["track_key", "suite_name", "scenario_name", "frame_index_num", "uuid_gt", "uuid_est", "label_gt"]
+    track_summary = (
+        sane.groupby(group_cols, dropna=False)
+        .agg(
+            mode_count=("mode", "nunique"),
+            start_distance_m=("start_distance_m", "first"),
+            confidence_mean=("confidence_est", "mean"),
+            horizon_max_sec=("aligned_horizon_sec", "max"),
+        )
+        .reset_index()
+    )
+
+    for checkpoint in checkpoints:
+        upto = sane[sane["aligned_horizon_sec"] <= checkpoint].copy()
+        if upto.empty:
+            track_summary[_metric_label("minADE", checkpoint)] = np.nan
+            track_summary[_metric_label("minFDE", checkpoint)] = np.nan
+            continue
+        ade_mode = (
+            upto.groupby(group_cols + ["mode"], dropna=False)["disp_error_m"]
+            .mean()
+            .reset_index(name="ade_m")
+        )
+        fde_mode = (
+            upto.sort_values("aligned_horizon_sec")
+            .groupby(group_cols + ["mode"], dropna=False)
+            .tail(1)[group_cols + ["mode", "disp_error_m"]]
+            .rename(columns={"disp_error_m": "fde_m"})
+        )
+        best_ade = ade_mode.groupby(group_cols, dropna=False)["ade_m"].min().reset_index()
+        best_fde = fde_mode.groupby(group_cols, dropna=False)["fde_m"].min().reset_index()
+        track_summary = track_summary.merge(
+            best_ade.rename(columns={"ade_m": _metric_label("minADE", checkpoint)}),
+            on=group_cols,
+            how="left",
+        ).merge(
+            best_fde.rename(columns={"fde_m": _metric_label("minFDE", checkpoint)}),
+            on=group_cols,
+            how="left",
+        )
+
+    track_summary["actor_bucket"] = track_summary["label_gt"].map(actor_bucket)
+    track_summary["scenario_context"] = track_summary["scenario_name"].map(infer_scenario_context)
+    track_summary["distance_bin"] = pd.Categorical(
+        track_summary["start_distance_m"].map(_distance_bin),
+        categories=DISTANCE_BIN_LABELS,
+        ordered=True,
+    )
+    return track_summary
+
+
+def build_best_mode_horizon_summary(matched_df: pd.DataFrame) -> pd.DataFrame:
+    if matched_df.empty:
+        return pd.DataFrame(
+            columns=[
+                "track_key",
+                "aligned_horizon_sec",
+                "disp_error_m",
+                "scenario_name",
+                "actor_bucket",
+                "scenario_context",
+            ]
+        )
+    sane = matched_df[~matched_df["is_metric_outlier"]].copy()
+    if sane.empty:
+        sane = matched_df.copy()
+    idx = sane.groupby(["track_key", "aligned_horizon_sec"], dropna=False)["disp_error_m"].idxmin()
+    out = sane.loc[idx, ["track_key", "aligned_horizon_sec", "disp_error_m", "scenario_name", "actor_bucket", "scenario_context"]].copy()
+    out = out.sort_values(["track_key", "aligned_horizon_sec"]).reset_index(drop=True)
+    return out
+
+
+def build_future_mode_label_summary(
+    track_summary: pd.DataFrame,
+    *,
+    checkpoints: Sequence[float] = (1.0, 2.0, 3.0),
+) -> pd.DataFrame:
+    if track_summary.empty:
+        return pd.DataFrame(columns=["Actor", "track_count", "mode_count_mean"])
+    agg_map: dict[str, tuple[str, str]] = {
+        "track_count": ("track_key", "nunique"),
+        "mode_count_mean": ("mode_count", "mean"),
+    }
+    for checkpoint in checkpoints:
+        agg_map[_metric_label("minADE", checkpoint)] = (_metric_label("minADE", checkpoint), "mean")
+        agg_map[_metric_label("minFDE", checkpoint)] = (_metric_label("minFDE", checkpoint), "mean")
+    out = (
+        track_summary.groupby("label_gt", dropna=False)
+        .agg(**agg_map)
+        .reset_index()
+        .rename(columns={"label_gt": "Actor"})
+        .sort_values("track_count", ascending=False)
+    )
+    return out
+
+
+def build_horizon_breakdown(
+    matched_df: pd.DataFrame,
+    *,
+    checkpoints: Sequence[float] | None = None,
+) -> pd.DataFrame:
+    if matched_df.empty:
+        return pd.DataFrame(columns=["metric", "value_m"])
+    data = _ensure_numeric(matched_df, ("aligned_horizon_sec", "disp_error_m"))
+    if checkpoints is None:
+        checkpoints = tuple(sorted(x for x in data["aligned_horizon_sec"].dropna().unique() if x > 0))
+    rows: list[dict[str, float | str]] = []
+    for checkpoint in checkpoints:
+        upto = data[data["aligned_horizon_sec"] <= checkpoint]
+        if upto.empty:
+            continue
+        per_track = upto.groupby("track_key", dropna=False)["disp_error_m"].mean()
+        rows.append({"metric": _metric_label("ADE", checkpoint), "value_m": float(per_track.mean())})
+    final = (
+        data.sort_values("aligned_horizon_sec")
+        .groupby("track_key", dropna=False)
+        .tail(1)["disp_error_m"]
+    )
+    rows.append({"metric": "FDE@final", "value_m": float(final.mean())})
+    return pd.DataFrame(rows)
+
+
+def enrich_track_summary(
+    track_df: pd.DataFrame,
+    matched_df: pd.DataFrame,
+    current_df: pd.DataFrame | None = None,
+) -> pd.DataFrame:
+    enriched = track_df.copy()
+    current_lookup = None
+    if current_df is not None and not current_df.empty:
+        current = current_df.copy()
+        current = _ensure_numeric(current, ("frame_index", "frame_index_num", "center_distance", "center_distance_f"))
+        if "frame_index_num" not in current.columns:
+            current["frame_index_num"] = current["frame_index"]
+        if "uuid_gt" not in current.columns and "uuid" in current.columns:
+            current["uuid_gt"] = current["uuid"]
+        distance_col = "center_distance_f" if "center_distance_f" in current.columns else "center_distance"
+        if distance_col in current.columns:
+            current_lookup = current.rename(columns={distance_col: "current_distance_m"})[
+                ["scenario_name", "frame_index_num", "uuid_gt", "current_distance_m"]
+            ].drop_duplicates()
+
+    matched_lookup = None
+    if not matched_df.empty:
+        matched = matched_df.copy()
+        matched = _ensure_numeric(matched, ("frame_index_num", "tx_f_gt", "ty_f_gt", "start_distance_m"))
+        if "start_distance_m" not in matched.columns and {"tx_f_gt", "ty_f_gt"}.issubset(matched.columns):
+            matched["start_distance_m"] = np.sqrt(matched["tx_f_gt"].pow(2) + matched["ty_f_gt"].pow(2))
+        cols = ["track_key", "start_distance_m"]
+        if {"scenario_name", "frame_index_num", "uuid_gt"}.issubset(matched.columns):
+            cols += ["scenario_name", "frame_index_num", "uuid_gt"]
+        matched_lookup = matched[cols].drop_duplicates()
+
+    if current_lookup is not None and {"scenario_name", "frame_index_num", "uuid_gt"}.issubset(enriched.columns):
+        enriched = enriched.merge(current_lookup, on=["scenario_name", "frame_index_num", "uuid_gt"], how="left")
+    else:
+        enriched["current_distance_m"] = np.nan
+
+    if matched_lookup is not None:
+        join_cols = ["track_key"] if "track_key" in enriched.columns and "track_key" in matched_lookup.columns else []
+        if not join_cols and {"scenario_name", "frame_index_num", "uuid_gt"}.issubset(enriched.columns) and {"scenario_name", "frame_index_num", "uuid_gt"}.issubset(matched_lookup.columns):
+            join_cols = ["scenario_name", "frame_index_num", "uuid_gt"]
+        if join_cols:
+            enriched = enriched.merge(
+                matched_lookup[join_cols + ["start_distance_m"]].drop_duplicates(),
+                on=join_cols,
+                how="left",
+            )
+        else:
+            enriched["start_distance_m"] = np.nan
+    elif "start_distance_m" not in enriched.columns:
+        enriched["start_distance_m"] = np.nan
+
+    if "start_distance_m_x" in enriched.columns:
+        enriched["start_distance_m"] = enriched["current_distance_m"].combine_first(enriched["start_distance_m_x"])
+        if "start_distance_m_y" in enriched.columns:
+            enriched["start_distance_m"] = enriched["start_distance_m"].combine_first(enriched["start_distance_m_y"])
+        enriched = enriched.drop(columns=[c for c in ("start_distance_m_x", "start_distance_m_y") if c in enriched.columns])
+    else:
+        enriched["start_distance_m"] = enriched["current_distance_m"].combine_first(enriched["start_distance_m"])
+
+    label_col = "label_gt" if "label_gt" in enriched.columns else "label"
+    enriched["actor_bucket"] = enriched[label_col].map(actor_bucket)
+    enriched["scenario_context"] = enriched["scenario_name"].map(infer_scenario_context)
+    enriched["distance_bin"] = pd.Categorical(
+        enriched["start_distance_m"].map(_distance_bin),
+        categories=DISTANCE_BIN_LABELS,
+        ordered=True,
+    )
+    return enriched
+
+
+def build_distance_bin_metrics(track_df: pd.DataFrame) -> pd.DataFrame:
+    data = track_df.copy()
+    if "distance_bin" in data.columns:
+        data["distance_bin"] = pd.Categorical(data["distance_bin"], categories=DISTANCE_BIN_LABELS, ordered=True)
+    else:
+        data["distance_bin"] = pd.Categorical(data["start_distance_m"].map(_distance_bin), categories=DISTANCE_BIN_LABELS, ordered=True)
+
+    grouped = (
+        data.groupby("distance_bin", observed=False)
+        .agg(
+            count=("track_key", "nunique"),
+            ade_m=("ade_m", "mean"),
+            fde_m=("fde_m", "mean"),
+            p90_fde_m=("fde_m", lambda s: s.quantile(0.90) if len(s.dropna()) else np.nan),
+            p95_fde_m=("fde_m", lambda s: s.quantile(0.95) if len(s.dropna()) else np.nan),
+        )
+        .reset_index()
+    )
+    return grouped
+
+
+def build_specsheet_aligned_prediction_artifacts(
+    future_df: pd.DataFrame,
+    *,
+    checkpoints: Sequence[float] = (1.0, 3.0, 5.0),
+    time_step: float = 0.1,
+    max_error_m: float = 100.0,
+    progress_callback: Callable[[float, str], None] | None = None,
+) -> dict[str, pd.DataFrame]:
+    from perception_catalog_analyzer.specsheet.blocks import bin_polar
+    from perception_catalog_analyzer.specsheet.metrics import load_metrics
+    from perception_catalog_analyzer.specsheet.metrics.functional import FUTURE_ARRAY_CACHE
+
+    report = progress_callback or _noop_progress
+    FUTURE_ARRAY_CACHE.clear()
+    metric_order = [_metric_label(prefix, checkpoint) for prefix in ("minADE", "minFDE") for checkpoint in checkpoints]
+    metric_map = {metric.name: metric for metric in load_metrics(metric_order)}
+
+    report(0.02, "Binning rows in the same polar grid used by the specsheet...")
+    normalized_future = _ensure_numeric(
+        future_df,
+        ("frame_index", "relative_time", "x", "y", "tx", "ty", "mode", "confidence"),
+    )
+    required_future_cols = ["source", "label", "uuid", "pair_uuid", "frame_index", "relative_time", "tx", "ty"]
+    present_required_cols = [col for col in required_future_cols if col in normalized_future.columns]
+    if present_required_cols:
+        normalized_future = normalized_future.dropna(subset=present_required_cols)
+    normalized_future = normalized_future.sort_values(
+        [col for col in ["label", "frame_index", "pair_uuid", "uuid", "mode", "relative_time"] if col in normalized_future.columns],
+        kind="stable",
+    ).reset_index(drop=True)
+
+    binned_future = bin_polar(normalized_future.copy())
+    if binned_future.empty:
+        report(0.9, "No future rows were available after binning.")
+        empty = pd.DataFrame()
+        return {
+            "label_summary": empty,
+            "distance_summary": empty,
+            "polar_summary": empty,
+        }
+
+    labels = sorted(str(v) for v in binned_future["label"].dropna().unique() if str(v).strip())
+    total_labels = max(len(labels), 1)
+    total_metrics = max(len(metric_order), 1)
+
+    label_rows: list[dict[str, object]] = []
+    distance_rows: list[dict[str, object]] = []
+    polar_rows: list[dict[str, object]] = []
+
+    report(0.28, f"Found {len(labels)} labels to aggregate.")
+    for label_idx, label_name in enumerate(labels, start=1):
+        label_start = 0.3 + (0.52 * (label_idx - 1) / total_labels)
+        label_end = 0.3 + (0.52 * label_idx / total_labels)
+        report(label_start, f"Aggregating label `{label_name}` ({label_idx}/{total_labels})...")
+        scoped = binned_future[binned_future["label"].astype(str) == label_name].copy()
+        est_scoped = scoped[scoped["source"].astype(str).str.upper() == "EST"].copy()
+
+        label_groups = list(scoped.groupby(["r", "theta"], observed=True))
+        total_groups = max(len(label_groups), 1)
+        for group_idx, ((r_name, theta_name), sub_df) in enumerate(label_groups, start=1):
+            warmup_progress = label_start + ((label_end - label_start) * 0.35 * group_idx / total_groups)
+            report(
+                warmup_progress,
+                f"Preparing label `{label_name}` ({label_idx}/{total_labels}) future arrays: bin `{r_name}` / `{theta_name}` ({group_idx}/{total_groups})...",
+            )
+            for metric_name in metric_order:
+                metric = metric_map[metric_name]
+                metric.apply(sub_df)
+
+        row: dict[str, object] = {
+            "label": label_name,
+            "future_rows": int(est_scoped[["scenario_name", "frame_index", "uuid"]].drop_duplicates().shape[0])
+            if {"scenario_name", "frame_index", "uuid"}.issubset(est_scoped.columns)
+            else int(len(est_scoped)),
+        }
+        for metric_idx, metric_name in enumerate(metric_order, start=1):
+            metric_progress = label_start + ((label_end - label_start) * (0.35 + (0.65 * metric_idx / total_metrics)))
+            report(
+                metric_progress,
+                f"Aggregating label `{label_name}` ({label_idx}/{total_labels}), metric `{metric_name}` ({metric_idx}/{total_metrics})...",
+            )
+            metric = metric_map[metric_name]
+            metric_df = metric.apply(scoped)
+            each_bin_df = metric.get_each_bin(metric_df)
+            around_df = metric.get_all_around(scoped).dropna(subset=[metric_name]).copy()
+            near_mask = around_df["r"].map(_parse_r_upper_bound) <= 60.0
+            near_values = around_df.loc[near_mask, metric_name].dropna()
+            row[metric_name] = float(np.nanmean(near_values.to_numpy(dtype=float))) if not near_values.empty else None
+
+            if not around_df.empty:
+                for rec in around_df[["r", metric_name]].to_dict("records"):
+                    distance_rows.append(
+                        {
+                            "label": label_name,
+                            "metric": metric_name,
+                            "r": rec["r"],
+                            "value": rec[metric_name],
+                        }
+                    )
+
+            polar_df = each_bin_df.dropna(subset=[metric_name]).copy()
+            if not polar_df.empty:
+                polar_df["label"] = label_name
+                polar_df["metric"] = metric_name
+                polar_df = polar_df.rename(columns={metric_name: "value"})
+                polar_rows.extend(polar_df[["label", "metric", "r", "theta", "value"]].to_dict("records"))
+
+        label_rows.append(row)
+
+    label_summary = pd.DataFrame(label_rows)
+    if not label_summary.empty:
+        report(0.86, "Finalizing overall summary row...")
+        total_rows = float(label_summary["future_rows"].sum())
+        overall_row: dict[str, object] = {
+            "label": "All",
+            "future_rows": int(total_rows),
+        }
+        for metric_name in metric_order:
+            valid = label_summary[["future_rows", metric_name]].dropna()
+            if valid.empty or float(valid["future_rows"].sum()) <= 0:
+                overall_row[metric_name] = None
+            else:
+                overall_row[metric_name] = float(
+                    (valid["future_rows"] * valid[metric_name]).sum() / valid["future_rows"].sum()
+                )
+        label_summary = pd.concat([pd.DataFrame([overall_row]), label_summary], ignore_index=True)
+    report(0.9, "Prediction summary tables are ready for cache save.")
+
+    return {
+        "label_summary": label_summary,
+        "distance_summary": pd.DataFrame(distance_rows),
+        "polar_summary": pd.DataFrame(polar_rows),
+    }
diff --git a/evaluation_dashboard_app/lib/release_specsheet_library.py b/evaluation_dashboard_app/lib/release_specsheet_library.py
new file mode 100644
index 0000000..ce51f84
--- /dev/null
+++ b/evaluation_dashboard_app/lib/release_specsheet_library.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+import urllib.parse
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from lib.path_utils import get_run_display_name, path_display
+
+
+RELEASE_ROLE_DIRS = ("performance", "usecase", "devops")
+DEFAULT_EVALUATOR_PROJECT_ID = "x2_dev"
+EVALUATOR_REPORT_BASE_URL = "https://evaluation.tier4.jp/evaluation/reports"
+
+
+def _overview_query(run_path: Path) -> str:
+    return urllib.parse.urlencode({"mode": "single", "run_a": get_run_display_name(run_path)})
+
+
+def _safe_url_part(value: str, fallback: str) -> str:
+    import re
+
+    text = re.sub(r"[^\w.\-]+", "_", str(value or "")).strip("._")
+    return text or fallback
+
+
+def _load_yaml(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    try:
+        data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+    except Exception:
+        return {}
+    return data if isinstance(data, dict) else {}
+
+
+def _role_metadata(role_dir: Path) -> dict[str, Any]:
+    metadata = _load_yaml(role_dir / "metadata.yaml")
+    if metadata:
+        return metadata
+    return _load_yaml(role_dir / "resources" / "metadata.yaml")
+
+
+def _evaluator_report_url(job_id: str, project_id: str = DEFAULT_EVALUATOR_PROJECT_ID) -> str:
+    if not job_id:
+        return ""
+    query = urllib.parse.urlencode({"project_id": project_id})
+    return f"{EVALUATOR_REPORT_BASE_URL}/{job_id}?{query}"
+
+
+def _pdf_static_url(release_name: str, topic_name: str) -> str:
+    release_part = _safe_url_part(release_name, "release")
+    topic_part = _safe_url_part(topic_name, "topic")
+    return f"/app/static/release_specs/{release_part}/{topic_part}.pdf"
+
+
+def discover_release_specsheet_inventory(data_root: Path) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for release_dir in sorted(data_root.glob("release_spec_*")):
+        if not release_dir.is_dir():
+            continue
+        metadata_path = release_dir / "metadata.yaml"
+        metadata = {}
+        if metadata_path.exists():
+            try:
+                metadata = yaml.safe_load(metadata_path.read_text(encoding="utf-8")) or {}
+            except Exception:
+                metadata = {}
+        if not isinstance(metadata, dict):
+            metadata = {}
+
+        specsheet_root = release_dir / "specsheet"
+        topic_pdf_paths = {
+            path
+            for path in specsheet_root.glob("*/*.pdf")
+            if path.is_file() or path.is_symlink()
+        }
+        pdfs: list[dict[str, Any]] = []
+        for pdf_path in sorted(specsheet_root.glob("**/*.pdf")):
+            if pdf_path.parent == specsheet_root and topic_pdf_paths:
+                continue
+            topic = pdf_path.parent.name if pdf_path.parent != specsheet_root else "default"
+            static_path = (
+                Path.cwd()
+                / "static"
+                / "release_specs"
+                / _safe_url_part(release_dir.name.replace("release_spec_", "", 1), "release")
+                / f"{_safe_url_part(topic, 'topic')}.pdf"
+            )
+            pdfs.append(
+                {
+                    "topic": topic,
+                    "path": pdf_path,
+                    "display_path": path_display(pdf_path),
+                    "absolute_path": str(pdf_path.resolve()),
+                    "static_path": static_path,
+                    "static_url": _pdf_static_url(release_dir.name.replace("release_spec_", "", 1), topic),
+                    "available": pdf_path.exists() and not pdf_path.is_dir(),
+                    "static_available": static_path.exists() and not static_path.is_dir(),
+                }
+            )
+
+        roles: dict[str, dict[str, Any]] = {}
+        for role in RELEASE_ROLE_DIRS:
+            role_dir = release_dir / role
+            if not role_dir.is_dir():
+                continue
+            role_metadata = _role_metadata(role_dir)
+            job_id = str(role_metadata.get("job_id") or "").strip()
+            project_id = str(role_metadata.get("project_id") or DEFAULT_EVALUATOR_PROJECT_ID).strip()
+            roles[role] = {
+                "path": role_dir,
+                "display_path": path_display(role_dir),
+                "absolute_path": str(role_dir.resolve()),
+                "run_name": get_run_display_name(role_dir),
+                "overview_query": _overview_query(role_dir),
+                "overview_url": f"/?{_overview_query(role_dir)}",
+                "job_id": job_id,
+                "project_id": project_id,
+                "evaluator_report_url": _evaluator_report_url(job_id, project_id),
+                "has_parquet": any(role_dir.glob("*.parquet")),
+                "has_summary": (role_dir / "summary.json").exists() or (role_dir / "resources" / "summary.json").exists(),
+                "has_metadata": (role_dir / "metadata.yaml").exists() or (role_dir / "resources" / "metadata.yaml").exists(),
+            }
+
+        rows.append(
+            {
+                "release_dir": release_dir,
+                "release_dir_display": path_display(release_dir),
+                "release_dir_absolute": str(release_dir.resolve()),
+                "release": release_dir.name.replace("release_spec_", "", 1),
+                "version": metadata.get("pilot_auto_version") or metadata.get("version_abbr") or "",
+                "date": metadata.get("date") or "",
+                "description": metadata.get("description") or "",
+                "data_count": metadata.get("data_count") or "",
+                "roles": roles,
+                "pdfs": pdfs,
+                "pdf_topics": ", ".join(pdf["topic"] for pdf in pdfs),
+                "main_pdf_url": next((pdf["static_url"] for pdf in pdfs), ""),
+                "main_pdf_path": next((pdf["display_path"] for pdf in pdfs), ""),
+            }
+        )
+    return rows
+
+
+def discover_ready_release_specsheets(data_root: Path) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for release in discover_release_specsheet_inventory(data_root):
+        default_run = release["roles"].get("performance") or next(iter(release["roles"].values()), {})
+        for pdf in release["pdfs"]:
+            rows.append(
+                {
+                    "release_dir": release["release_dir"],
+                    "pdf_path": pdf["path"],
+                    "release": release["release"],
+                    "version": release["version"],
+                    "date": release["date"],
+                    "description": release["description"],
+                    "topic": pdf["topic"],
+                    "view_run": default_run.get("run_name", ""),
+                    "overview_query": default_run.get("overview_query", ""),
+                }
+            )
+    return rows
diff --git a/evaluation_dashboard_app/lib/run_loader.py b/evaluation_dashboard_app/lib/run_loader.py
index d240511..fc99168 100644
--- a/evaluation_dashboard_app/lib/run_loader.py
+++ b/evaluation_dashboard_app/lib/run_loader.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 import pandas as pd
+from lib.score_schema import read_score_csv
 
 SUMMARY_DTYPES = {
     "id": "string",
@@ -29,17 +30,7 @@ def load_run(run_dir: Path):
     if not summary_path.exists():
         if _has_parquet_files(run_dir):
             # Parquet-only run: allow load for Detection Stats and Bounding Box Viewer
-            score = pd.read_csv(
-                score_path,
-                header=None,
-                engine="python",
-                names=[
-                    "Scenario", "Option", "GT_OBJ", "Distance0", "NM0", "TP/TN0", "ADD0", "AIL0", "UIL0", "PFN/PFP0", "UUID Num0", "Practical Pass Rate0", "MAX_DIST_THRESH0", "OBJ_CNTS0",
-                    "Distance1", "NM1", "TP/TN1", "ADD1", "AIL1", "UIL1", "PFN/PFP1", "UUID Num1", "Practical Pass Rate1", "MAX_DIST_THRESH1", "OBJ_CNTS1",
-                    "Distance2", "NM2", "TP/TN2", "ADD2", "AIL2", "UIL2", "PFN/PFP2", "UUID Num2", "Practical Pass Rate2", "MAX_DIST_THRESH2", "OBJ_CNTS2",
-                    "Distance3", "NM3", "TP/TN3", "ADD3", "AIL3", "UIL3", "PFN/PFP3", "UUID Num3", "Practical Pass Rate3", "MAX_DIST_THRESH3", "OBJ_CNTS3",
-                ]
-            ) if score_path.exists() else None
+            score = read_score_csv(score_path)
             return {
                 "path": run_dir,
                 "summary": None,
@@ -60,17 +51,7 @@ def load_run(run_dir: Path):
         if col not in summary.columns:
             summary[col] = pd.Series([""] * len(summary), dtype="string")
 
-    score = pd.read_csv(
-        score_path,
-        header=None,
-        engine="python",
-        names=[
-            "Scenario", "Option", "GT_OBJ", "Distance0", "NM0", "TP/TN0", "ADD0", "AIL0", "UIL0", "PFN/PFP0", "UUID Num0", "Practical Pass Rate0", "MAX_DIST_THRESH0", "OBJ_CNTS0",
-            "Distance1", "NM1", "TP/TN1", "ADD1", "AIL1", "UIL1", "PFN/PFP1", "UUID Num1", "Practical Pass Rate1", "MAX_DIST_THRESH1", "OBJ_CNTS1",
-            "Distance2", "NM2", "TP/TN2", "ADD2", "AIL2", "UIL2", "PFN/PFP2", "UUID Num2", "Practical Pass Rate2", "MAX_DIST_THRESH2", "OBJ_CNTS2",
-            "Distance3", "NM3", "TP/TN3", "ADD3", "AIL3", "UIL3", "PFN/PFP3", "UUID Num3", "Practical Pass Rate3", "MAX_DIST_THRESH3", "OBJ_CNTS3",
-        ]
-    ) if score_path.exists() else None
+    score = read_score_csv(score_path)
 
     return {
         "path": run_dir,
diff --git a/evaluation_dashboard_app/lib/run_metadata.py b/evaluation_dashboard_app/lib/run_metadata.py
new file mode 100644
index 0000000..f5821b1
--- /dev/null
+++ b/evaluation_dashboard_app/lib/run_metadata.py
@@ -0,0 +1,391 @@
+"""Helpers for durable per-run metadata stored alongside local run folders."""
+
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Any, Dict, Iterable, Optional
+
+from lib.path_utils import get_data_root, path_display, to_data_relative
+
+RUN_METADATA_FILENAME = ".run_metadata.json"
+RUN_METADATA_SCHEMA_VERSION = 1
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+
+
+def _json_safe(value: Any) -> Any:
+    if isinstance(value, dict):
+        return {str(key): _json_safe(val) for key, val in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [_json_safe(item) for item in value]
+    if isinstance(value, Path):
+        return str(value)
+    if isinstance(value, datetime):
+        if value.tzinfo is None:
+            value = value.replace(tzinfo=timezone.utc)
+        return value.astimezone(timezone.utc).replace(microsecond=0).isoformat()
+    return value
+
+
+def _deep_merge(base: Dict[str, Any], patch: Dict[str, Any]) -> Dict[str, Any]:
+    merged = dict(base)
+    for key, value in patch.items():
+        if isinstance(value, dict) and isinstance(merged.get(key), dict):
+            merged[key] = _deep_merge(merged[key], value)
+        else:
+            merged[key] = value
+    return merged
+
+
+def normalize_run_path(path_like: str | Path, *, allow_missing: bool = True) -> Optional[Path]:
+    raw = str(path_like or "").strip()
+    if not raw:
+        return None
+    try:
+        candidate = Path(raw)
+        if not candidate.is_absolute():
+            candidate = get_data_root() / candidate
+        resolved = candidate.resolve(strict=False)
+        try:
+            resolved.relative_to(get_data_root())
+        except ValueError:
+            return None
+        if not allow_missing and not resolved.exists():
+            return None
+        return resolved
+    except Exception:
+        return None
+
+
+def find_run_directory(path_like: str | Path, *, create_missing: bool = False) -> Optional[Path]:
+    resolved = normalize_run_path(path_like, allow_missing=True)
+    if resolved is None:
+        return None
+    try:
+        rel = resolved.relative_to(get_data_root())
+    except ValueError:
+        return None
+    if not rel.parts:
+        return None
+    run_dir = get_data_root() / rel.parts[0]
+    if create_missing:
+        run_dir.mkdir(parents=True, exist_ok=True)
+    elif not run_dir.exists():
+        return None
+    return run_dir
+
+
+def resolve_run_directory_from_task_parameters(
+    parameters: Dict[str, Any],
+    *,
+    create_missing: bool = False,
+) -> Optional[Path]:
+    for key in ("output_path", "output_dir", "eval_root", "pkl_dir", "result_path"):
+        path_value = parameters.get(key)
+        if not path_value:
+            continue
+        run_dir = find_run_directory(path_value, create_missing=create_missing)
+        if run_dir is not None:
+            return run_dir
+    return None
+
+
+def metadata_path_for_run(run_path: Path) -> Path:
+    return run_path / RUN_METADATA_FILENAME
+
+
+def read_run_metadata(run_path: Path) -> Dict[str, Any]:
+    meta_path = metadata_path_for_run(run_path)
+    if not meta_path.exists():
+        return {}
+    try:
+        with meta_path.open("r", encoding="utf-8") as fh:
+            payload = json.load(fh)
+        return payload if isinstance(payload, dict) else {}
+    except Exception:
+        return {}
+
+
+def write_run_metadata(run_path: Path, metadata: Dict[str, Any], *, create_missing: bool = False) -> Dict[str, Any]:
+    if create_missing:
+        run_path.mkdir(parents=True, exist_ok=True)
+    elif not run_path.exists():
+        raise FileNotFoundError(str(run_path))
+
+    payload = dict(metadata)
+    payload["schema_version"] = RUN_METADATA_SCHEMA_VERSION
+    payload["run_name"] = run_path.name
+    payload["run_path"] = to_data_relative(run_path)
+    payload["run_path_display"] = path_display(run_path)
+    payload["updated_at"] = _utc_now_iso()
+    payload.setdefault("created_at", payload["updated_at"])
+
+    meta_path = metadata_path_for_run(run_path)
+    with NamedTemporaryFile("w", encoding="utf-8", dir=str(run_path), delete=False) as tmp:
+        json.dump(_json_safe(payload), tmp, ensure_ascii=False, indent=2, sort_keys=True)
+        tmp.write("\n")
+        tmp_path = Path(tmp.name)
+    try:
+        os.chmod(tmp_path, 0o644)
+    except Exception:
+        pass
+    tmp_path.replace(meta_path)
+    try:
+        os.chmod(meta_path, 0o644)
+    except Exception:
+        pass
+    return payload
+
+
+def upsert_run_metadata(run_path: Path, patch: Dict[str, Any], *, create_missing: bool = False) -> Dict[str, Any]:
+    existing = read_run_metadata(run_path)
+    merged = _deep_merge(existing, _json_safe(patch))
+    if "created_at" not in merged:
+        merged["created_at"] = _utc_now_iso()
+    return write_run_metadata(run_path, merged, create_missing=create_missing)
+
+
+def flatten_metadata_text(value: Any) -> Iterable[str]:
+    if value is None:
+        return []
+    if isinstance(value, dict):
+        parts = []
+        for key, item in value.items():
+            parts.append(str(key))
+            parts.extend(flatten_metadata_text(item))
+        return parts
+    if isinstance(value, (list, tuple, set)):
+        parts = []
+        for item in value:
+            parts.extend(flatten_metadata_text(item))
+        return parts
+    text = str(value).strip()
+    return [text] if text else []
+
+
+def build_run_search_blob(run_path: Path, metadata: Dict[str, Any], extra_values: Optional[Iterable[Any]] = None) -> str:
+    parts = [run_path.name, to_data_relative(run_path), path_display(run_path)]
+    parts.extend(flatten_metadata_text(metadata))
+    if extra_values:
+        for value in extra_values:
+            parts.extend(flatten_metadata_text(value))
+    return " ".join(part for part in parts if part).lower()
+
+
+def _as_dict(value: Any) -> Dict[str, Any]:
+    if isinstance(value, dict):
+        return value
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+            return parsed if isinstance(parsed, dict) else {}
+        except Exception:
+            return {}
+    return {}
+
+
+def resolve_run_directory_from_task_row(task_row: Dict[str, Any]) -> Optional[Path]:
+    params = _as_dict(task_row.get("parameters"))
+    run_dir = resolve_run_directory_from_task_parameters(params, create_missing=False)
+    if run_dir is not None:
+        return run_dir
+    result_path = task_row.get("result_path")
+    if result_path:
+        return find_run_directory(result_path, create_missing=False)
+    summary = _as_dict(task_row.get("result_summary"))
+    for key in ("output_path", "summary_path", "parquet_path"):
+        path_value = summary.get(key)
+        if path_value:
+            run_dir = find_run_directory(path_value, create_missing=False)
+            if run_dir is not None:
+                return run_dir
+    return None
+
+
+def build_metadata_patch_from_task_row(task_row: Dict[str, Any]) -> Dict[str, Any]:
+    params = _as_dict(task_row.get("parameters"))
+    summary = _as_dict(task_row.get("result_summary"))
+    task_type = str(task_row.get("type") or "").strip()
+    request_output = str(
+        params.get("output_path")
+        or params.get("output_dir")
+        or params.get("eval_root")
+        or params.get("pkl_dir")
+        or task_row.get("result_path")
+        or ""
+    ).strip()
+
+    patch: Dict[str, Any] = {
+        "source_mode": task_type,
+        "task": {
+            "id": str(task_row.get("id") or "").strip(),
+            "type": task_type,
+            "status": str(task_row.get("status") or "").strip(),
+            "requested_by": str(task_row.get("session_id") or "").strip(),
+            "created_at": task_row.get("created_at"),
+            "updated_at": task_row.get("updated_at"),
+            "result_path": str(task_row.get("result_path") or "").strip(),
+            "error_message": str(task_row.get("error_message") or "").strip(),
+            "progress_message": str(task_row.get("progress_message") or "").strip(),
+            "progress_pct": task_row.get("progress_pct"),
+        },
+        "request": {
+            "environment": str(params.get("environment") or "default").strip() or "default",
+            "project_id": str(params.get("project_id") or "").strip(),
+            "job_id": str(params.get("job_id") or "").strip(),
+            "catalog_id": str(params.get("catalog_id") or "").strip(),
+            "integration_id": str(params.get("integration_id") or "").strip(),
+            "source_job_id": str(params.get("source_job_id") or "").strip(),
+            "target_name": str(params.get("target_name") or "").strip(),
+            "description": str(params.get("description") or "").strip(),
+            "suite_id": str(params.get("suite_id") or "").strip(),
+            "suite_ids": list(params.get("suite_ids") or []),
+            "download_type": str(params.get("download_type") or "").strip(),
+            "phase": str(params.get("phase") or "").strip(),
+            "skip_large_file": bool(params.get("skip_large_file", False)),
+            "large_file_mb": params.get("large_file_mb"),
+            "keep_zip_files": bool(params.get("keep_zip_files", False)),
+            "run_eval": bool(params.get("run_eval", False)),
+            "generate_parquet": bool(params.get("generate_parquet", False)),
+            "eval_recursive": bool(params.get("eval_recursive", False)),
+            "eval_overwrite": bool(params.get("eval_overwrite", False)),
+            "max_retries": params.get("max_retries"),
+            "clean_build": bool(params.get("clean_build", False)),
+            "debug": bool(params.get("debug", False)),
+            "is_tag": bool(params.get("is_tag", False)),
+            "scenario_name_filter": str(params.get("scenario_name_filter") or "").strip(),
+            "selected_ids": list(params.get("selected_ids") or []),
+            "output_path": request_output,
+            "parameters": params,
+        },
+        "backfilled_from_task_history": True,
+    }
+
+    if task_type == "download_results":
+        patch["download"] = {
+            "mode": "download_results",
+            "total": summary.get("total", 0),
+            "success": summary.get("success", 0),
+            "failed": summary.get("failed", 0),
+            "rows": list(summary.get("rows") or [])[:100],
+            "download_type": str(params.get("download_type") or "").strip(),
+            "phase": str(params.get("phase") or "").strip(),
+            "skip_large_file": bool(params.get("skip_large_file", False)),
+            "large_file_mb": params.get("large_file_mb"),
+            "keep_zip_files": bool(params.get("keep_zip_files", False)),
+        }
+    elif task_type == "download_scenarios":
+        patch["scenario_download"] = {
+            "total": summary.get("total", 0),
+            "success": summary.get("success", 0),
+            "failed": summary.get("failed", 0),
+            "rows": list(summary.get("rows") or [])[:100],
+            "overwrite": bool(params.get("overwrite", False)),
+            "scenario_name_filter": str(params.get("scenario_name_filter") or "").strip(),
+            "selected_ids": list(params.get("selected_ids") or []),
+        }
+    elif task_type == "run_eval_dirs":
+        patch["evaluation"] = {
+            "directories_processed": summary.get("directories_processed", 0),
+            "success": summary.get("success", 0),
+            "failed": summary.get("failed", 0),
+            "skipped": summary.get("skipped", 0),
+            "summary_path": str(summary.get("summary_path") or "").strip(),
+            "summary_rows": summary.get("summary_rows", 0),
+            "score_rows": summary.get("score_rows", 0),
+            "enabled": True,
+            "recursive": bool(params.get("recursive", True)),
+            "overwrite": bool(params.get("overwrite", False)),
+        }
+    elif task_type == "generate_summary_csv":
+        patch["evaluation"] = {
+            "summary_path": str(summary.get("summary_path") or "").strip(),
+            "summary_rows": summary.get("summary_rows", 0),
+            "score_rows": summary.get("score_rows", 0),
+            "enabled": True,
+        }
+    elif task_type == "build_parquet":
+        patch["parquet"] = {
+            "enabled": True,
+            "path": str(summary.get("output_path") or "").strip(),
+        }
+    elif task_type == "download_and_eval":
+        patch["download"] = {
+            "mode": "download_and_eval",
+            **_as_dict(summary.get("download_summary")),
+            "download_type": str(params.get("download_type") or "").strip(),
+            "phase": str(params.get("phase") or "").strip(),
+            "skip_large_file": bool(params.get("skip_large_file", False)),
+            "large_file_mb": params.get("large_file_mb"),
+            "keep_zip_files": bool(params.get("keep_zip_files", False)),
+        }
+        patch["evaluation"] = {
+            **_as_dict(summary.get("eval_summary")),
+            "enabled": bool(params.get("run_eval", False)),
+            "recursive": bool(params.get("eval_recursive", False)),
+            "overwrite": bool(params.get("eval_overwrite", False)),
+        }
+        patch["parquet"] = {
+            "enabled": bool(params.get("generate_parquet", False)),
+            "path": str(summary.get("parquet_path") or "").strip(),
+        }
+        errors = list(summary.get("errors") or [])
+        if errors:
+            patch["errors"] = errors
+    elif task_type == "run_evaluator_and_process":
+        patch["evaluator"] = {
+            "job_id": str(summary.get("evaluator_job_id") or params.get("job_id") or "").strip(),
+            "report_url": str(summary.get("evaluator_report_url") or "").strip(),
+            "status": str(summary.get("evaluator_status") or "").strip(),
+            "title": str(summary.get("evaluator_title") or params.get("description") or "").strip(),
+            "scheduled_by": str(summary.get("evaluator_scheduled_by") or "").strip(),
+            "build_status": str(summary.get("evaluator_build_status") or "").strip(),
+            "test_status": str(summary.get("evaluator_test_status") or "").strip(),
+            "fail_message": str(summary.get("evaluator_fail_message") or "").strip(),
+            "case_totals": _as_dict(summary.get("evaluator_case_totals")),
+            "suites": list(summary.get("evaluator_suites") or []),
+            "failed_cases": list(summary.get("evaluator_failed_cases") or []),
+            "catalog_id": str(params.get("catalog_id") or "").strip(),
+            "catalog_name": str(summary.get("evaluator_catalog_name") or "").strip(),
+            "catalog_version_id": str(summary.get("evaluator_catalog_version_id") or "").strip(),
+            "catalog_url": str(summary.get("evaluator_catalog_url") or "").strip(),
+            "integration_id": str(params.get("integration_id") or "").strip(),
+            "source_job_id": str(params.get("source_job_id") or "").strip(),
+            "target_name": str(params.get("target_name") or "").strip(),
+            "target": str(summary.get("evaluator_target") or params.get("target_name") or "").strip(),
+            "git_sha": str(summary.get("evaluator_git_sha") or "").strip(),
+            "git_ref_url": str(summary.get("evaluator_git_ref_url") or "").strip(),
+            "git_commit_url": str(summary.get("evaluator_git_commit_url") or "").strip(),
+            "source_url": str(summary.get("evaluator_source_url") or "").strip(),
+            "source_repo_label": str(summary.get("evaluator_source_repo_label") or "").strip(),
+            "description": str(params.get("description") or "").strip(),
+            "is_tag": bool(params.get("is_tag", False)),
+        }
+        patch["download"] = {
+            "mode": "run_evaluator_and_process",
+            **_as_dict(summary.get("download_summary")),
+            "rows": list(summary.get("download_rows") or [])[:100],
+            "download_type": str(params.get("download_type") or "").strip(),
+            "phase": str(params.get("phase") or "").strip(),
+            "skip_large_file": bool(params.get("skip_large_file", False)),
+            "large_file_mb": params.get("large_file_mb"),
+            "keep_zip_files": bool(params.get("keep_zip_files", False)),
+        }
+        patch["evaluation"] = {
+            **_as_dict(summary.get("eval_summary")),
+            "enabled": bool(params.get("run_eval", False)),
+            "recursive": bool(params.get("eval_recursive", False)),
+            "overwrite": bool(params.get("eval_overwrite", False)),
+        }
+        patch["parquet"] = {
+            "enabled": bool(params.get("generate_parquet", False)),
+            "path": str(summary.get("parquet_path") or "").strip(),
+        }
+
+    return patch
diff --git a/evaluation_dashboard_app/lib/score_schema.py b/evaluation_dashboard_app/lib/score_schema.py
new file mode 100644
index 0000000..5aef313
--- /dev/null
+++ b/evaluation_dashboard_app/lib/score_schema.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+SCORE_BASE_COLS = ["Scenario", "Option", "GT_OBJ"]
+SCORE_BASE_COLS_WITH_DATASET = ["Scenario", "Dataset", "Option", "GT_OBJ"]
+
+SCORE_SOURCE_METRIC_COLS = [
+    "Distance",
+    "NM",
+    "TP/TN",
+    "ADD",
+    "AIL",
+    "UIL",
+    "PFN/PFP",
+    "UUID Num",
+    "Practical Pass Rate",
+    "MAX_DIST_THRESH",
+    "OBJ_CNTS",
+]
+
+SCORE_VIEW_METRIC_COLS = [
+    "distance",
+    "nm",
+    "tp_tn",
+    "add",
+    "ail",
+    "uil",
+    "pfn_pfp",
+    "uuid_num",
+    "pass_rate",
+    "max_dist_thresh",
+    "obj_cnts",
+]
+
+SCORE_NUM_COLS = [
+    "nm",
+    "tp_tn",
+    "add",
+    "ail",
+    "uil",
+    "pfn_pfp",
+    "uuid_num",
+    "pass_rate",
+    "max_dist_thresh",
+]
+
+SCORE_BLOCK_SIZE = len(SCORE_VIEW_METRIC_COLS)
+
+
+def _looks_like_header(row: pd.Series) -> bool:
+    first = str(row.iloc[0]).strip() if len(row) else ""
+    return first == "Scenario"
+
+
+def _looks_like_criteria_cell(value: object) -> bool:
+    text = str(value).strip()
+    return text.startswith("criteria")
+
+
+def _drop_extra_empty_trailing_columns(df: pd.DataFrame, base_count: int) -> pd.DataFrame:
+    while (
+        df.shape[1]
+        and df.iloc[:, -1].isna().all()
+        and (df.shape[1] - base_count) % SCORE_BLOCK_SIZE != 0
+    ):
+        df = df.iloc[:, :-1]
+    return df
+
+
+def _infer_base_count(df: pd.DataFrame, header_row: pd.Series | None) -> int:
+    if header_row is not None:
+        header_values = [str(x).strip() for x in header_row.tolist()]
+        if len(header_values) >= 4 and header_values[1] == "Dataset":
+            return 4
+        return 3
+
+    if df.empty:
+        return 3
+    first = df.iloc[0]
+    if len(first) > 4 and _looks_like_criteria_cell(first.iloc[4]):
+        return 4
+    if len(first) > 3 and _looks_like_criteria_cell(first.iloc[3]):
+        return 3
+
+    ncols = df.shape[1]
+    if ncols >= 4 and (ncols - 4) % SCORE_BLOCK_SIZE == 0:
+        return 4
+    return 3
+
+
+def score_raw_columns(has_dataset: bool, criteria_count: int) -> list[str]:
+    cols = list(SCORE_BASE_COLS_WITH_DATASET if has_dataset else SCORE_BASE_COLS)
+    for i in range(criteria_count):
+        cols.extend(f"{name}{i}" for name in SCORE_SOURCE_METRIC_COLS)
+    return cols
+
+
+def read_score_csv(score_path: Path) -> pd.DataFrame | None:
+    if not score_path.exists():
+        return None
+
+    raw = pd.read_csv(score_path, header=None, engine="python")
+    if raw.empty:
+        return raw
+
+    header_row = raw.iloc[0] if _looks_like_header(raw.iloc[0]) else None
+    if header_row is not None:
+        raw = raw.iloc[1:].reset_index(drop=True)
+
+    base_count = _infer_base_count(raw, header_row)
+    raw = _drop_extra_empty_trailing_columns(raw, base_count)
+    criteria_count = max(1, (raw.shape[1] - base_count) // SCORE_BLOCK_SIZE)
+    expected_cols = base_count + criteria_count * SCORE_BLOCK_SIZE
+    raw = raw.iloc[:, :expected_cols].copy()
+    raw.columns = score_raw_columns(base_count == 4, criteria_count)
+    return raw.reset_index(drop=True)
+
+
+def score_base_cols(df_raw: pd.DataFrame) -> list[str]:
+    if df_raw is not None and "Dataset" in df_raw.columns:
+        return list(SCORE_BASE_COLS_WITH_DATASET)
+    return list(SCORE_BASE_COLS)
+
+
+def infer_score_criteria_count(
+    df_raw: pd.DataFrame,
+    max_criteria: int = 32,
+) -> int:
+    if df_raw is None or df_raw.empty:
+        return 1
+    base_count = len(score_base_cols(df_raw))
+    n = (df_raw.shape[1] - base_count) // SCORE_BLOCK_SIZE
+    n = max(1, n)
+    return int(min(n, max_criteria))
+
+
+def build_score_view(df_raw: pd.DataFrame, criteria_idx: int) -> pd.DataFrame:
+    base_cols = score_base_cols(df_raw)
+    start = len(base_cols) + criteria_idx * SCORE_BLOCK_SIZE
+    end = start + SCORE_BLOCK_SIZE
+
+    df_view = df_raw.loc[:, base_cols].copy()
+    block = df_raw.iloc[:, start:end].copy()
+    block.columns = SCORE_VIEW_METRIC_COLS
+    df_view = pd.concat([df_view, block], axis=1)
+    for column in SCORE_NUM_COLS:
+        df_view[column] = pd.to_numeric(df_view[column], errors="coerce")
+    return df_view
+
+
+def score_identity_cols(df: pd.DataFrame) -> list[str]:
+    return ["Scenario", "Dataset"] if df is not None and "Dataset" in df.columns else ["Scenario"]
diff --git a/evaluation_dashboard_app/lib/specsheet_report.py b/evaluation_dashboard_app/lib/specsheet_report.py
new file mode 100644
index 0000000..992c8b0
--- /dev/null
+++ b/evaluation_dashboard_app/lib/specsheet_report.py
@@ -0,0 +1,1677 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from dataclasses import dataclass
+import inspect
+import json
+import os
+import re
+import shutil
+from types import SimpleNamespace
+from pathlib import Path
+from typing import Any, Callable, Iterable, Sequence
+
+import pandas as pd
+import yaml
+
+from lib.path_utils import get_data_root
+from lib.run_metadata import read_run_metadata
+
+DEFAULT_SPECSHEET_TOPIC = "perception.object_recognition.tracking.objects"
+DEFAULT_TREND_TOPIC = "perception.object_recognition.objects"
+DETECTION_TREND_TOPIC_BY_MODEL = {
+    "bevfusion": "perception.object_recognition.detection.bevfusion.objects",
+    "centerpoint": "perception.object_recognition.detection.centerpoint.objects",
+}
+DEFAULT_SPECSHEET_PROJECT_ID = "x2_dev"
+DEFAULT_SPECSHEET_LABELS = ["car", "truck", "bus", "bicycle", "pedestrian", "motorcycle"]
+DEFAULT_SPECSHEET_METRICS = [
+    "mAP",
+    "precision",
+    "recall",
+    "FNR",
+    "max_consecutive_fn_duration",
+    "x_error",
+    "y_error",
+    "yaw_error",
+    "speed_error",
+]
+FUTURE_SPECSHEET_METRICS = [
+    "minADE@1s",
+    "minADE@3s",
+    "minADE@5s",
+    "minFDE@1s",
+    "minFDE@3s",
+    "minFDE@5s",
+]
+TREND_METADATA_FILENAME = "metadata.yaml"
+TREND_SUMMARY_FILENAME = "summary.json"
+SPECSHEET_RELEASE_ROLE_DIRS = ("performance", "usecase", "devops")
+GENERATED_TREND_HISTORY_DIRNAME = "_app_trend_history"
+FULL_DATASET_EVALUATION_HEADER = "全数データセット評価"
+DEFAULT_TREND_METADATA_TEXT = """tags: [trend]
+pilot_auto_version: "Pilot.Auto v4.3.0 (centerpoint x2/2.3.1)"
+data_count: 99,776+
+description: データの追加
+date: 2025.11.7
+"""
+_TREND_DATE_PATTERN = re.compile(r"^\d{4}\.\d{1,2}\.\d{1,2}$")
+_TREND_DATA_COUNT_PATTERN = re.compile(r"^\d[\d,]*\+?$")
+_PILOT_AUTO_PREFIX_PATTERN = re.compile(r"^Pilot\.Auto\s+", re.IGNORECASE)
+
+
+@dataclass
+class TrendReleaseGroup:
+    group_key: str
+    display_name: str
+    topic_name: str
+    group_kind: str
+    base_dir: Path
+    jobs: dict[str, dict[str, Any]]
+
+
+def get_specsheet_artifact_paths(run_dir: str | Path) -> dict[str, Path]:
+    run_path = Path(run_dir)
+    return {
+        "run_dir": run_path,
+        "current_csv": run_path / "current.csv",
+        "future_csv": run_path / "future.csv",
+        "current_parquet": run_path / "current.parquet",
+        "future_parquet": run_path / "future.parquet",
+        "resource_dir": run_path / "resources",
+        "trend_metadata": run_path / "resources" / TREND_METADATA_FILENAME,
+        "trend_summary": run_path / "resources" / TREND_SUMMARY_FILENAME,
+        "specsheet_dir": run_path / "specsheet",
+        "specsheet_pdf": run_path / "specsheet" / "specsheet.pdf",
+    }
+
+
+def _topic_values_from_frame(frame: pd.DataFrame) -> list[str]:
+    for column in ("topic_name", "topic"):
+        if column not in frame.columns:
+            continue
+        values = [
+            str(value).strip()
+            for value in frame[column].dropna().unique().tolist()
+            if str(value).strip()
+        ]
+        if values:
+            return sorted(values)
+    return []
+
+
+def detect_specsheet_topic_names(run_dir: str | Path, *, csv_sample_rows: int = 50000) -> list[str]:
+    """Detect topic names already present in specsheet CSV/parquet artifacts."""
+    paths = get_specsheet_artifact_paths(run_dir)
+    detected: set[str] = set()
+
+    for parquet_path in (paths["current_parquet"], paths["future_parquet"]):
+        if not parquet_path.exists():
+            continue
+        try:
+            import pyarrow.parquet as pq
+
+            columns = set(pq.ParquetFile(parquet_path).schema_arrow.names)
+        except Exception:
+            try:
+                columns = set(pd.read_parquet(parquet_path, columns=[]).columns)
+            except Exception:
+                columns = set()
+        topic_columns = [column for column in ("topic_name", "topic") if column in columns]
+        for column in topic_columns:
+            try:
+                frame = pd.read_parquet(parquet_path, columns=[column])
+            except Exception:
+                continue
+            detected.update(_topic_values_from_frame(frame))
+
+    for csv_path in (paths["current_csv"], paths["future_csv"]):
+        if not csv_path.exists():
+            continue
+        try:
+            header = pd.read_csv(csv_path, nrows=0)
+        except Exception:
+            continue
+        topic_columns = [column for column in ("topic_name", "topic") if column in header.columns]
+        for column in topic_columns:
+            try:
+                frame = pd.read_csv(csv_path, usecols=[column], nrows=csv_sample_rows)
+            except Exception:
+                continue
+            detected.update(_topic_values_from_frame(frame))
+
+    return sorted(detected)
+
+
+def resolve_specsheet_topic_name(
+    run_dir: str | Path,
+    requested_topic: str | None,
+    *,
+    fallback_topic: str = DEFAULT_SPECSHEET_TOPIC,
+) -> tuple[str, list[str]]:
+    """Resolve the topic that should be used for specsheet generation."""
+    requested = str(requested_topic or "").strip()
+    detected = detect_specsheet_topic_names(run_dir)
+    if requested and requested in detected:
+        return requested, detected
+    if fallback_topic in detected:
+        return fallback_topic, detected
+    if len(detected) == 1:
+        return detected[0], detected
+    return requested or fallback_topic, detected
+
+
+def _looks_like_specsheet_release_container(path: Path) -> bool:
+    return (
+        (path / TREND_METADATA_FILENAME).exists()
+        and any((path / role).is_dir() for role in SPECSHEET_RELEASE_ROLE_DIRS)
+    )
+
+
+def get_release_specsheet_context(run_dir: str | Path) -> dict[str, Any] | None:
+    """Return release-folder context for specsheet workflow output, if present."""
+    run_path = Path(run_dir)
+    if _looks_like_specsheet_release_container(run_path):
+        release_dir = run_path
+    elif run_path.name in SPECSHEET_RELEASE_ROLE_DIRS and _looks_like_specsheet_release_container(run_path.parent):
+        release_dir = run_path.parent
+    else:
+        return None
+
+    roles: dict[str, dict[str, Path | bool]] = {}
+    for role in SPECSHEET_RELEASE_ROLE_DIRS:
+        role_dir = release_dir / role
+        if not role_dir.is_dir():
+            continue
+        role_paths = get_specsheet_artifact_paths(role_dir)
+        roles[role] = {
+            "run_dir": role_dir,
+            "metadata": role_paths["trend_metadata"],
+            "summary": role_paths["trend_summary"],
+            "has_metadata": role_paths["trend_metadata"].exists(),
+            "has_summary": role_paths["trend_summary"].exists(),
+        }
+
+    metadata_path = release_dir / TREND_METADATA_FILENAME
+    if not metadata_path.exists():
+        performance_metadata = roles.get("performance", {}).get("metadata")
+        if isinstance(performance_metadata, Path) and performance_metadata.exists():
+            metadata_path = performance_metadata
+
+    return {
+        "release_dir": release_dir,
+        "metadata": metadata_path,
+        "roles": roles,
+        "performance_dir": roles.get("performance", {}).get("run_dir"),
+        "devops_dir": roles.get("devops", {}).get("run_dir"),
+    }
+
+
+def resolve_specsheet_generation_run_path(run_dir: str | Path) -> Path:
+    """Use the performance child as the PDF body for release workflow folders."""
+    run_path = Path(run_dir)
+    context = get_release_specsheet_context(run_path)
+    if context is None:
+        return run_path
+    performance_dir = context.get("performance_dir")
+    if isinstance(performance_dir, Path):
+        return performance_dir
+    return run_path
+
+
+def list_specsheet_source_parquets(run_dir: str | Path) -> list[Path]:
+    paths = get_specsheet_artifact_paths(run_dir)
+    run_path = paths["run_dir"]
+    ordered: list[Path] = []
+    seen: set[Path] = set()
+    for key in ("current_parquet", "future_parquet"):
+        path = paths[key]
+        if path.exists():
+            ordered.append(path)
+            seen.add(path)
+    for path in sorted(run_path.glob("*.parquet"), key=lambda p: p.name.lower()):
+        if path not in seen:
+            ordered.append(path)
+            seen.add(path)
+    return ordered
+
+
+def get_latest_source_mtime(run_dir: str | Path) -> float | None:
+    candidates = list_specsheet_source_parquets(run_dir)
+    if not candidates:
+        return None
+    return max(path.stat().st_mtime for path in candidates if path.exists())
+
+
+def is_specsheet_pdf_fresh(run_dir: str | Path) -> bool:
+    paths = get_specsheet_artifact_paths(run_dir)
+    pdf_path = paths["specsheet_pdf"]
+    if not pdf_path.exists():
+        return False
+    latest_source_mtime = get_latest_source_mtime(run_dir)
+    if latest_source_mtime is None:
+        return True
+    return pdf_path.stat().st_mtime >= latest_source_mtime
+
+
+def _notify(progress_callback: Callable[[str], None] | None, message: str) -> None:
+    if progress_callback is not None:
+        progress_callback(message)
+
+
+@contextmanager
+def _patch_block_generation_progress(
+    progress_callback: Callable[[str], None] | None,
+):
+    if progress_callback is None:
+        yield
+        return
+
+    try:
+        from perception_catalog_analyzer.specsheet import blocks as specsheet_blocks
+    except ImportError:
+        yield
+        return
+
+    original_tqdm = specsheet_blocks.tqdm
+
+    class ProgressTqdm:
+        def __init__(self, iterable, desc: str | None = None, **kwargs):
+            self._items = list(iterable)
+            self._desc = desc or ""
+            self._current_index = 0
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def __iter__(self):
+            for idx, item in enumerate(self._items, start=1):
+                self._current_index = idx
+                yield item
+
+        def set_postfix_str(self, text: str) -> None:
+            total = len(self._items)
+            if total <= 0:
+                return
+            _notify(
+                progress_callback,
+                f"{self._desc} {self._current_index}/{total}: {text}",
+            )
+
+    specsheet_blocks.tqdm = ProgressTqdm
+    try:
+        yield
+    finally:
+        specsheet_blocks.tqdm = original_tqdm
+
+
+def _copy_parquet_to_csv(parquet_path: Path, csv_path: Path) -> Path:
+    frame = pd.read_parquet(parquet_path)
+    frame.to_csv(csv_path, index=False)
+    return csv_path
+
+
+def _prefer_cjk_font_stack(html_lines: Sequence[str]) -> list[str]:
+    rendered = list(html_lines)
+    generic = "font-family: sans-serif;"
+    preferred = (
+        'font-family: "Noto Sans CJK JP", "Noto Sans JP", '
+        '"IPAGothic", "IPA Gothic", sans-serif;'
+    )
+    return [line.replace(generic, preferred) for line in rendered]
+
+
+def parse_trend_metadata_text(text: str) -> dict[str, Any]:
+    """Parse and validate manual trend metadata YAML input."""
+    raw = yaml.safe_load(text or "")
+    if not isinstance(raw, dict):
+        raise ValueError("Trend metadata must be a YAML object with key/value pairs.")
+
+    tags = raw.get("tags")
+    if isinstance(tags, str):
+        tags = [tags]
+    if not isinstance(tags, list) or not any(str(tag).strip() == "trend" for tag in tags):
+        raise ValueError("Trend metadata must include `tags: [trend]`.")
+
+    pilot_auto_version = str(raw.get("pilot_auto_version") or "").strip()
+    if not pilot_auto_version:
+        raise ValueError("Trend metadata requires a non-empty `pilot_auto_version`.")
+
+    data_count = str(raw.get("data_count") or "").strip()
+    if not data_count or not _TREND_DATA_COUNT_PATTERN.match(data_count):
+        raise ValueError(
+            "Trend metadata `data_count` must look like `99,776+` or `12345`."
+        )
+
+    description = str(raw.get("description") or "").strip()
+    date = str(raw.get("date") or "").strip()
+    if not date or not _TREND_DATE_PATTERN.match(date):
+        raise ValueError("Trend metadata `date` must look like `2025.11.7`.")
+
+    parsed = {
+        "tags": ["trend"],
+        "pilot_auto_version": pilot_auto_version,
+        "data_count": data_count,
+        "description": description,
+        "date": date,
+    }
+    for optional_key in ("release_group", "topic_name", "version_abbr"):
+        optional_value = str(raw.get(optional_key) or "").strip()
+        if optional_value:
+            parsed[optional_key] = optional_value
+    return parsed
+
+
+def _trend_version_abbr(metadata: dict[str, Any]) -> str:
+    explicit = str(metadata.get("version_abbr") or "").strip()
+    if explicit:
+        return explicit
+    version = str(metadata.get("pilot_auto_version") or "").strip()
+    if not version:
+        return ""
+    try:
+        from perception_catalog_analyzer.trend import _abbreviate_version
+
+        abbreviated = str(_abbreviate_version(version) or "").strip()
+        if abbreviated:
+            return abbreviated
+    except Exception:
+        pass
+    shortened = _PILOT_AUTO_PREFIX_PATTERN.sub("", version).strip() or version
+    return shortened[:16]
+
+
+def _infer_trend_topic(metadata: dict[str, Any], metadata_path: str | Path) -> str:
+    explicit = str(metadata.get("topic_name") or "").strip()
+    if explicit and explicit != DEFAULT_SPECSHEET_TOPIC:
+        return explicit
+    for part in reversed(Path(metadata_path).parts):
+        if part.startswith("perception.") and part != DEFAULT_SPECSHEET_TOPIC:
+            return part
+    return DEFAULT_TREND_TOPIC
+
+
+def write_trend_metadata(run_dir: str | Path, metadata: dict[str, Any]) -> Path:
+    paths = get_specsheet_artifact_paths(run_dir)
+    resource_dir = paths["resource_dir"]
+    metadata_path = paths["trend_metadata"]
+    resource_dir.mkdir(parents=True, exist_ok=True)
+    with metadata_path.open("w", encoding="utf-8") as fh:
+        yaml.safe_dump(metadata, fh, allow_unicode=True, sort_keys=False)
+    return metadata_path
+
+
+def discover_trend_metadata_files(root_dir: str | Path | None = None) -> list[Path]:
+    base_dir = Path(root_dir) if root_dir is not None else get_data_root()
+    if not base_dir.exists():
+        return []
+
+    matches: list[Path] = []
+    for metadata_path in base_dir.rglob(TREND_METADATA_FILENAME):
+        if not metadata_path.is_file():
+            continue
+        if GENERATED_TREND_HISTORY_DIRNAME in metadata_path.parts:
+            continue
+        if any(part.startswith("release_spec_") for part in metadata_path.parts):
+            continue
+        if not (metadata_path.parent / TREND_SUMMARY_FILENAME).exists():
+            continue
+        matches.append(metadata_path)
+    return sorted(dict.fromkeys(path.resolve() for path in matches), key=lambda p: str(p))
+
+
+def load_trend_metadata_file(metadata_path: str | Path) -> dict[str, Any]:
+    with Path(metadata_path).open("r", encoding="utf-8") as fh:
+        data = yaml.safe_load(fh) or {}
+    if not isinstance(data, dict):
+        raise ValueError(f"Invalid trend metadata file: {metadata_path}")
+    return data
+
+
+def load_trend_summary_file(summary_path: str | Path) -> dict[str, Any]:
+    with Path(summary_path).open("r", encoding="utf-8") as fh:
+        data = json.load(fh)
+    if not isinstance(data, dict):
+        raise ValueError(f"Invalid trend summary file: {summary_path}")
+    return data
+
+
+def classify_trend_summary(summary: dict[str, Any]) -> str:
+    blocks = summary.get("blocks")
+    if isinstance(blocks, list):
+        headers = [str(block.get("header") or "") for block in blocks]
+        if "全数データセット評価" in headers:
+            return "full"
+        if "ユースケース評価" in headers:
+            return "usecase"
+        return "performance_blocks"
+    if isinstance(summary, dict) and summary:
+        return "devops"
+    return "unknown"
+
+
+def _unwrap_devops_summary(summary: dict[str, Any]) -> dict[str, Any]:
+    devops = summary.get("DevOps") if isinstance(summary, dict) else None
+    if isinstance(devops, dict):
+        return devops
+    return summary
+
+
+def _release_role_key_for_metadata(role: str) -> str:
+    if role in {"full", "performance_blocks"}:
+        return "performance"
+    return role
+
+
+def _job_id_from_run_metadata(run_dir: Path, role: str) -> str:
+    role_key = _release_role_key_for_metadata(role)
+    candidates = [run_dir]
+    if run_dir.parent != run_dir:
+        candidates.append(run_dir.parent)
+
+    for candidate in candidates:
+        metadata = read_run_metadata(candidate)
+        release_specsheet = metadata.get("release_specsheet") if isinstance(metadata.get("release_specsheet"), dict) else {}
+        evaluator_jobs = release_specsheet.get("evaluator_jobs") if isinstance(release_specsheet.get("evaluator_jobs"), dict) else {}
+        role_meta = evaluator_jobs.get(role_key) if isinstance(evaluator_jobs.get(role_key), dict) else {}
+        job_id = str(role_meta.get("job_id") or "").strip()
+        if job_id:
+            return job_id
+
+        evaluator_meta = metadata.get("evaluator") if isinstance(metadata.get("evaluator"), dict) else {}
+        job_id = str(evaluator_meta.get("job_id") or "").strip()
+        if job_id:
+            return job_id
+
+        request_meta = metadata.get("request") if isinstance(metadata.get("request"), dict) else {}
+        parameter_meta = request_meta.get("parameters") if isinstance(request_meta.get("parameters"), dict) else {}
+        for key in (f"{role_key}_job_id", "job_id"):
+            job_id = str(parameter_meta.get(key) or request_meta.get(key) or "").strip()
+            if job_id:
+                return job_id
+    return ""
+
+
+def _release_metadata_match(candidate: dict[str, Any], target: dict[str, Any]) -> bool:
+    for key in ("release_group", "pilot_auto_version", "topic_name", "description", "data_count"):
+        target_value = str(target.get(key) or "").strip()
+        if target_value and str(candidate.get(key) or "").strip() != target_value:
+            return False
+    return True
+
+
+def _job_id_from_matching_release_run_metadata(root_dir: str | Path | None, target_metadata: dict[str, Any], role: str) -> str:
+    root = Path(root_dir) if root_dir is not None else get_data_root()
+    if not root.exists() or not root.is_dir():
+        return ""
+    role_key = _release_role_key_for_metadata(role)
+    candidates = sorted(
+        [path for path in root.iterdir() if path.is_dir()],
+        key=lambda path: path.stat().st_mtime if path.exists() else 0,
+        reverse=True,
+    )
+    for candidate in candidates:
+        metadata = read_run_metadata(candidate)
+        request_meta = metadata.get("request") if isinstance(metadata.get("request"), dict) else {}
+        parameter_meta = request_meta.get("parameters") if isinstance(request_meta.get("parameters"), dict) else {}
+        trend_metadata = (
+            parameter_meta.get("trend_metadata")
+            if isinstance(parameter_meta.get("trend_metadata"), dict)
+            else {}
+        )
+        release_specsheet = metadata.get("release_specsheet") if isinstance(metadata.get("release_specsheet"), dict) else {}
+        release_metadata = (
+            release_specsheet.get("metadata")
+            if isinstance(release_specsheet.get("metadata"), dict)
+            else trend_metadata
+        )
+        if not _release_metadata_match(release_metadata, target_metadata):
+            continue
+
+        evaluator_jobs = release_specsheet.get("evaluator_jobs") if isinstance(release_specsheet.get("evaluator_jobs"), dict) else {}
+        role_meta = evaluator_jobs.get(role_key) if isinstance(evaluator_jobs.get(role_key), dict) else {}
+        job_id = str(role_meta.get("job_id") or "").strip()
+        if job_id:
+            return job_id
+
+        job_id = str(parameter_meta.get(f"{role_key}_job_id") or request_meta.get(f"{role_key}_job_id") or "").strip()
+        if job_id:
+            return job_id
+    return ""
+
+
+def discover_trend_release_groups(root_dir: str | Path | None = None) -> list[TrendReleaseGroup]:
+    metadata_files = discover_trend_metadata_files(root_dir)
+    grouped: dict[str, TrendReleaseGroup] = {}
+    standalone_records: list[dict[str, Any]] = []
+
+    for metadata_path in metadata_files:
+        summary_path = metadata_path.parent / TREND_SUMMARY_FILENAME
+        summary = load_trend_summary_file(summary_path)
+        role = classify_trend_summary(summary)
+        metadata = load_trend_metadata_file(metadata_path)
+
+        if metadata_path.parent.name == "resources":
+            run_dir = metadata_path.parent.parent
+            group_key = f"run::{run_dir.resolve()}"
+            display_name = run_dir.name
+            topic_name = str(metadata.get("topic_name") or "standalone")
+            group_kind = "standalone_run"
+            base_dir = run_dir
+            standalone_records.append(
+                {
+                    "group_key": group_key,
+                    "display_name": display_name,
+                    "topic_name": topic_name,
+                    "group_kind": group_kind,
+                    "base_dir": base_dir,
+                    "role": role,
+                    "job_id": str(
+                        metadata.get("job_id")
+                        or _job_id_from_run_metadata(run_dir, role)
+                        or _job_id_from_matching_release_run_metadata(root_dir, metadata, role)
+                        or ""
+                    ),
+                    "metadata_path": metadata_path,
+                    "summary_path": summary_path,
+                    "metadata": metadata,
+                    "summary": summary,
+                }
+            )
+            continue
+        else:
+            job_dir = metadata_path.parent
+            topic_dir = job_dir.parent
+            combined_dir = topic_dir.parent
+            group_key = f"group::{combined_dir.resolve()}::{topic_dir.name}"
+            display_name = combined_dir.name
+            topic_name = topic_dir.name
+            group_kind = "library_pdf_group"
+            base_dir = combined_dir
+
+        if group_key not in grouped:
+            grouped[group_key] = TrendReleaseGroup(
+                group_key=group_key,
+                display_name=display_name,
+                topic_name=topic_name,
+                group_kind=group_kind,
+                base_dir=base_dir,
+                jobs={},
+            )
+        grouped[group_key].jobs[role] = {
+            "role": role,
+            "job_id": str(
+                metadata.get("job_id")
+                or _job_id_from_run_metadata(metadata_path.parent, role)
+                or _job_id_from_matching_release_run_metadata(root_dir, metadata, role)
+                or (metadata_path.parent.name if metadata_path.parent.name != "resources" else run_dir.name)
+            ),
+            "metadata_path": metadata_path.resolve(),
+            "summary_path": summary_path.resolve(),
+            "metadata": metadata,
+            "summary": summary,
+        }
+
+    standalone_by_release: dict[tuple[str, str, str, str, str, str], list[dict[str, Any]]] = {}
+    for record in standalone_records:
+        metadata = record["metadata"]
+        release_key = (
+            str(metadata.get("release_group") or ""),
+            str(record["topic_name"] or ""),
+            str(metadata.get("pilot_auto_version") or ""),
+            str(metadata.get("date") or ""),
+            str(metadata.get("description") or ""),
+            str(metadata.get("data_count") or ""),
+        )
+        standalone_by_release.setdefault(release_key, []).append(record)
+
+    for release_key, records in standalone_by_release.items():
+        role_counts: dict[str, int] = {}
+        for record in records:
+            role = str(record["role"])
+            role_counts[role] = role_counts.get(role, 0) + 1
+
+        can_group = len(records) > 1 and all(count == 1 for count in role_counts.values())
+        if can_group:
+            sample = records[0]
+            metadata = sample["metadata"]
+            release_label = (
+                str(metadata.get("release_group") or "").strip()
+                or str(metadata.get("pilot_auto_version") or "").strip()
+                or "standalone_release"
+            )
+            date_label = str(metadata.get("date") or "").strip()
+            display_name = f"{release_label} | {date_label}" if date_label else release_label
+            group_key = "standalone_group::" + "::".join(release_key)
+            grouped[group_key] = TrendReleaseGroup(
+                group_key=group_key,
+                display_name=display_name,
+                topic_name=str(sample["topic_name"]),
+                group_kind="standalone_release_group",
+                base_dir=Path(root_dir) if root_dir is not None else get_data_root(),
+                jobs={},
+            )
+            target_group = grouped[group_key]
+            for record in records:
+                target_group.jobs[str(record["role"])] = {
+                    "role": record["role"],
+                    "job_id": record["job_id"],
+                    "metadata_path": record["metadata_path"].resolve(),
+                    "summary_path": record["summary_path"].resolve(),
+                    "metadata": record["metadata"],
+                    "summary": record["summary"],
+                }
+            continue
+
+        for record in records:
+            group_key = str(record["group_key"])
+            grouped[group_key] = TrendReleaseGroup(
+                group_key=group_key,
+                display_name=str(record["display_name"]),
+                topic_name=str(record["topic_name"]),
+                group_kind=str(record["group_kind"]),
+                base_dir=record["base_dir"],
+                jobs={
+                    str(record["role"]): {
+                        "role": record["role"],
+                        "job_id": record["job_id"],
+                        "metadata_path": record["metadata_path"].resolve(),
+                        "summary_path": record["summary_path"].resolve(),
+                        "metadata": record["metadata"],
+                        "summary": record["summary"],
+                    }
+                },
+            )
+
+    def _sort_key(group: TrendReleaseGroup) -> tuple[str, str]:
+        dates = [
+            str(job["metadata"].get("date") or "")
+            for job in group.jobs.values()
+            if isinstance(job.get("metadata"), dict)
+        ]
+        newest = max(dates) if dates else ""
+        return (newest, group.display_name)
+
+    return sorted(_deduplicate_trend_release_groups(grouped.values()), key=_sort_key)
+
+
+def _trend_group_identity(group: TrendReleaseGroup) -> tuple[str, str, str, str, str, str, tuple[str, ...]]:
+    metadata = {}
+    for role in ("full", "usecase", "devops", "performance_blocks", "unknown"):
+        if role in group.jobs:
+            metadata = group.jobs[role].get("metadata", {})
+            break
+    return (
+        str(metadata.get("release_group") or ""),
+        str(group.topic_name or ""),
+        str(metadata.get("pilot_auto_version") or ""),
+        str(metadata.get("date") or ""),
+        str(metadata.get("description") or ""),
+        str(metadata.get("data_count") or ""),
+        tuple(sorted(group.jobs.keys())),
+    )
+
+
+def _trend_group_preference(group: TrendReleaseGroup) -> tuple[int, int, str]:
+    generated_history = any(
+        GENERATED_TREND_HISTORY_DIRNAME in Path(job.get("metadata_path", "")).parts
+        for job in group.jobs.values()
+    )
+    return (
+        0 if generated_history else 1,
+        len(group.jobs),
+        str(group.base_dir),
+    )
+
+
+def _deduplicate_trend_release_groups(groups: Iterable[TrendReleaseGroup]) -> list[TrendReleaseGroup]:
+    selected: dict[tuple[str, str, str, str, str, str, tuple[str, ...]], TrendReleaseGroup] = {}
+    for group in groups:
+        identity = _trend_group_identity(group)
+        current = selected.get(identity)
+        if current is None or _trend_group_preference(group) > _trend_group_preference(current):
+            selected[identity] = group
+    return list(selected.values())
+
+
+def _trend_version_sort_key(pilot_auto_version: str) -> tuple[tuple[int, int, int], str, tuple[int, int, int]]:
+    pattern = r"v(\d+)\.(\d+)\.(\d+)\s*\(([^ ]+)\s+(.+)\)"
+    match = re.search(pattern, str(pilot_auto_version or ""))
+    if not match:
+        return ((999, 999, 999), str(pilot_auto_version or ""), (999, 999, 999))
+
+    major = int(match.group(1))
+    minor = int(match.group(2))
+    patch = int(match.group(3))
+    ml_model_type = match.group(4)
+    ml_model_info = match.group(5)
+    try:
+        _, ml_model_version = ml_model_info.split("/")
+        ml_major, ml_minor, ml_patch = ml_model_version.split(".")
+        ml_version = (int(ml_major), int(ml_minor), int(ml_patch))
+    except ValueError:
+        ml_version = (999, 999, 999)
+    return ((major, minor, patch), ml_model_type, ml_version)
+
+
+def _canonical_summary_table_key(table_data: dict[str, Any]) -> str:
+    return json.dumps(table_data, ensure_ascii=False, sort_keys=True, allow_nan=True)
+
+
+def _deduplicate_summary_tables(data_list: Sequence[dict[str, Any]]) -> list[dict[str, Any]]:
+    deduplicated: list[dict[str, Any]] = []
+    seen: set[str] = set()
+    for table_data in data_list:
+        key = _canonical_summary_table_key(table_data)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduplicated.append(table_data)
+    return deduplicated
+
+
+def _extract_full_metric_tables(summary: dict[str, Any]) -> list[dict[str, Any]]:
+    data_list: list[dict[str, Any]] = []
+    blocks = summary.get("blocks", [])
+    if not isinstance(blocks, list):
+        return data_list
+    for block in blocks:
+        if not isinstance(block, dict):
+            continue
+        if block.get("header") != FULL_DATASET_EVALUATION_HEADER:
+            continue
+        if block.get("mode") not in (None, "metrics"):
+            continue
+        if block.get("evaluation_type") not in (None, "full"):
+            continue
+        block_tables = block.get("tables", [])
+        if not isinstance(block_tables, list):
+            continue
+        for tables in block_tables:
+            if not isinstance(tables, dict):
+                continue
+            table_data = tables.get("data", {})
+            if isinstance(table_data, dict) and table_data:
+                data_list.append(table_data)
+    return _deduplicate_summary_tables(data_list)
+
+
+def _load_only_full_summary(summary_path: Path) -> list[dict[str, Any]]:
+    summary = load_trend_summary_file(summary_path)
+    return _extract_full_metric_tables(summary)
+
+
+def ensure_full_trend_summary(summary_path: str | Path) -> Path:
+    """Validate that analyzer block generation produced a full trend summary."""
+    path = Path(summary_path)
+    if not path.exists():
+        raise FileNotFoundError(
+            f"Full trend summary was not created: {path}. "
+            "The analyzer must write resources/summary.json before trend PDF generation."
+        )
+    summary = load_trend_summary_file(path)
+    role = classify_trend_summary(summary)
+    if role != "full":
+        raise ValueError(f"Expected a full trend summary at {path}, but it classified as `{role}`.")
+    extract_performance_metrics_from_summary(summary)
+    return path
+
+
+def extract_performance_metrics_from_summary(summary: dict[str, Any]) -> dict[str, float]:
+    """Return averaged full-performance metrics from a full summary payload."""
+    data_list = _extract_full_metric_tables(summary)
+
+    if len(data_list) != 1:
+        raise ValueError(f"Expected exactly one distinct full summary table, but got {len(data_list)}")
+    metrics = data_list[0]
+
+    def _avg(metric_name: str) -> float:
+        values = metrics.get(metric_name, {})
+        if not isinstance(values, dict) or not values:
+            return float("nan")
+        numeric = pd.to_numeric(pd.Series(list(values.values())), errors="coerce")
+        return float(numeric.mean())
+
+    return {
+        "mAP": _avg("mAP"),
+        "precision": _avg("precision"),
+        "recall": _avg("recall"),
+        "FNR": _avg("FNR"),
+        "x_error": _avg("x_error"),
+        "y_error": _avg("y_error"),
+        "yaw_error": _avg("yaw_error"),
+        "speed_error": _avg("speed_error"),
+        "minADE@1s": _avg("minADE@1s"),
+        "minFDE@1s": _avg("minFDE@1s"),
+        "minADE@3s": _avg("minADE@3s"),
+        "minFDE@3s": _avg("minFDE@3s"),
+        "minADE@5s": _avg("minADE@5s"),
+        "minFDE@5s": _avg("minFDE@5s"),
+    }
+
+
+def extract_devops_case_rows(summary: dict[str, Any]) -> list[dict[str, Any]]:
+    """Flatten nested devops/pass-rate summary into case rows."""
+    summary = _unwrap_devops_summary(summary)
+    rows: list[dict[str, Any]] = []
+    for major_category, mid_categories in summary.items():
+        if not isinstance(mid_categories, dict):
+            continue
+        for mid_category, minor_or_cases in mid_categories.items():
+            if not isinstance(minor_or_cases, dict):
+                continue
+            for minor_or_case_name, result_or_cases in minor_or_cases.items():
+                if not isinstance(result_or_cases, dict):
+                    continue
+                if {"passed", "total"}.intersection(result_or_cases.keys()):
+                    case_items = [(minor_or_case_name, result_or_cases)]
+                    minor_category = minor_or_case_name
+                else:
+                    case_items = [
+                        (case_name, result)
+                        for case_name, result in result_or_cases.items()
+                        if isinstance(result, dict)
+                    ]
+                    minor_category = minor_or_case_name
+
+                for case_name, result in case_items:
+                    passed = int(result.get("passed", 0) or 0)
+                    total = int(result.get("total", 0) or 0)
+                    rows.append(
+                        {
+                            "major_category": major_category,
+                            "mid_category": mid_category,
+                            "minor_category": minor_category,
+                            "case_name": case_name,
+                            "passed": passed,
+                            "total": total,
+                            "pass_rate": (passed / total * 100.0) if total > 0 else None,
+                        }
+                    )
+    return rows
+
+
+def _normalize_devops_summary_structure(summary: dict[str, Any]) -> dict[str, dict[str, dict[str, dict[str, int]]]]:
+    summary = _unwrap_devops_summary(summary)
+    normalized: dict[str, dict[str, dict[str, dict[str, int]]]] = {}
+    for major_category, mid_categories in summary.items():
+        if not isinstance(mid_categories, dict):
+            continue
+        normalized_major = normalized.setdefault(str(major_category), {})
+        for mid_category, minor_or_cases in mid_categories.items():
+            if not isinstance(minor_or_cases, dict):
+                continue
+            normalized_mid = normalized_major.setdefault(str(mid_category), {})
+            if {"passed", "total"}.intersection(minor_or_cases.keys()):
+                normalized_mid[str(mid_category)] = {
+                    "passed": int(minor_or_cases.get("passed", 0) or 0),
+                    "total": int(minor_or_cases.get("total", 0) or 0),
+                }
+                continue
+            for case_name, result in minor_or_cases.items():
+                if not isinstance(result, dict):
+                    continue
+                normalized_mid[str(case_name)] = {
+                    "passed": int(result.get("passed", 0) or 0),
+                    "total": int(result.get("total", 0) or 0),
+                }
+    return normalized
+
+
+def _align_devops_trend_data_structures(trend_data_rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    structure: dict[str, dict[str, set[str]]] = {}
+    for row in trend_data_rows:
+        devops_data = _normalize_devops_summary_structure(row.get("devops_data", {}))
+        row["devops_data"] = devops_data
+        for major_category, mid_categories in devops_data.items():
+            major_structure = structure.setdefault(major_category, {})
+            for mid_category, cases in mid_categories.items():
+                major_structure.setdefault(mid_category, set()).update(cases.keys())
+
+    for row in trend_data_rows:
+        devops_data = row.get("devops_data", {})
+        if not isinstance(devops_data, dict):
+            devops_data = {}
+            row["devops_data"] = devops_data
+        for major_category, mid_categories in structure.items():
+            row_major = devops_data.setdefault(major_category, {})
+            for mid_category, cases in mid_categories.items():
+                row_mid = row_major.setdefault(mid_category, {})
+                for case_name in cases:
+                    row_mid.setdefault(case_name, {"passed": 0, "total": 0})
+    return trend_data_rows
+
+
+def load_performance_trend_data(metadata_list: Sequence[Path]) -> list[dict[str, str | int | float]]:
+    trend_data_rows: list[dict[str, Any]] = []
+    for metadata_path in metadata_list:
+        metadata = load_trend_metadata_file(metadata_path)
+        if "trend" not in [str(tag).strip() for tag in metadata.get("tags", [])]:
+            continue
+        summary_path = Path(metadata_path).parent / TREND_SUMMARY_FILENAME
+        if not summary_path.exists():
+            continue
+        summary_list = _load_only_full_summary(summary_path)
+        if not summary_list:
+            continue
+        trend_data_rows.append(
+            {
+                "version": metadata.get("pilot_auto_version"),
+                "version_abbr": _trend_version_abbr(metadata),
+                "data_count": metadata.get("data_count"),
+                "description": metadata.get("description"),
+                "date": metadata.get("date"),
+                "topic": _infer_trend_topic(metadata, metadata_path),
+                "summary": summary_list,
+            }
+        )
+
+    trend_data_rows.sort(key=lambda row: _trend_version_sort_key(str(row.get("version") or "")))
+
+    output: list[dict[str, str | int | float]] = []
+    for row in trend_data_rows:
+        summary = row.get("summary") or []
+        if len(summary) != 1:
+            raise ValueError(
+                f"Expected exactly one distinct summary block for version {row.get('version')}, "
+                f"but got {len(summary)}"
+            )
+        metrics = summary[0]
+
+        def _avg(metric_name: str) -> float:
+            values = metrics.get(metric_name, {})
+            if not isinstance(values, dict) or not values:
+                return float("nan")
+            numeric = pd.to_numeric(pd.Series(list(values.values())), errors="coerce")
+            return float(numeric.mean())
+
+        output.append(
+            {
+                "version": row.get("version"),
+                "version_abbr": row.get("version_abbr"),
+                "data_count": row.get("data_count"),
+                "description": row.get("description"),
+                "date": row.get("date"),
+                "topic": row.get("topic"),
+                "mAP": _avg("mAP"),
+                "precision": _avg("precision"),
+                "recall": _avg("recall"),
+                "minADE@1s": _avg("minADE@1s"),
+                "minFDE@1s": _avg("minFDE@1s"),
+                "minADE@3s": _avg("minADE@3s"),
+                "minFDE@3s": _avg("minFDE@3s"),
+                "minADE@5s": _avg("minADE@5s"),
+                "minFDE@5s": _avg("minFDE@5s"),
+            }
+        )
+    return output
+
+
+def load_devops_trend_data(metadata_list: Sequence[Path]) -> list[dict[str, Any]]:
+    trend_data_rows: list[dict[str, Any]] = []
+    for metadata_path in metadata_list:
+        metadata = load_trend_metadata_file(metadata_path)
+        if "trend" not in [str(tag).strip() for tag in metadata.get("tags", [])]:
+            continue
+        summary_path = Path(metadata_path).parent / TREND_SUMMARY_FILENAME
+        if not summary_path.exists():
+            continue
+        summary = load_trend_summary_file(summary_path)
+        if classify_trend_summary(summary) != "devops":
+            continue
+
+        rows = extract_devops_case_rows(summary)
+        if not rows:
+            continue
+        normalized_summary = _normalize_devops_summary_structure(summary)
+        overall_passed = sum(int(row["passed"]) for row in rows)
+        overall_total = sum(int(row["total"]) for row in rows)
+        trend_data_rows.append(
+            {
+                "version": metadata.get("pilot_auto_version"),
+                "version_abbr": _trend_version_abbr(metadata),
+                "data_count": metadata.get("data_count"),
+                "description": metadata.get("description"),
+                "date": metadata.get("date"),
+                "topic": _infer_trend_topic(metadata, metadata_path),
+                "overall_pass_rate": (overall_passed / overall_total * 100.0)
+                if overall_total > 0
+                else 0.0,
+                "scenario_count": overall_total,
+                "devops_data": normalized_summary,
+            }
+        )
+
+    trend_data_rows.sort(key=lambda row: _trend_version_sort_key(str(row.get("version") or "")))
+    return _align_devops_trend_data_structures(trend_data_rows)
+
+
+def _add_devops_detail_trend_rates(devops_trend_data: Sequence[dict[str, Any]]) -> list[str]:
+    cases: set[str] = set()
+    for row in devops_trend_data:
+        devops_data = row.get("devops_data", {})
+        if not isinstance(devops_data, dict):
+            continue
+        for mid_categories in devops_data.values():
+            if not isinstance(mid_categories, dict):
+                continue
+            for sub_category, sub_categories in mid_categories.items():
+                if not isinstance(sub_categories, dict):
+                    continue
+                total_passed = sum(
+                    int(result.get("passed", 0) or 0)
+                    for result in sub_categories.values()
+                    if isinstance(result, dict)
+                )
+                total = sum(
+                    int(result.get("total", 0) or 0)
+                    for result in sub_categories.values()
+                    if isinstance(result, dict)
+                )
+                row[sub_category] = total_passed / total * 100.0 if total > 0 else 0.0
+                cases.add(str(sub_category))
+    return sorted(cases)
+
+
+def _devops_trend_rows_for_template(devops_trend_data: Sequence[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for row in devops_trend_data:
+        display_row = dict(row)
+        version_abbr = str(display_row.get("version_abbr") or "").strip()
+        if version_abbr:
+            display_row["version"] = version_abbr
+        rows.append(display_row)
+    return rows
+
+
+def _build_trend_context(
+    metadata_list: Sequence[Path],
+    output_dir: Path,
+    current_devops_summary_path: Path | None = None,
+    progress_callback: Callable[[str], None] | None = None,
+) -> dict[str, object]:
+    if not metadata_list:
+        return {
+            "performance_trend_data": [],
+            "map_trend_plot_path": output_dir / "map_trend.png",
+            "prediction_trend_plot_path": output_dir / "prediction_trend.png",
+            "devops_data": {},
+            "devops_plot_path": None,
+            "devops_trend_data": [],
+            "devops_trend_plot_path": output_dir / "devops_trend.png",
+            "job_ids": [],
+        }
+
+    try:
+        from perception_catalog_analyzer.plot.map_trend import generate_map_trend_plot
+        from perception_catalog_analyzer.plot.prediction_trend import generate_prediction_trend_plot
+        from perception_catalog_analyzer.plot.devops_trend import (
+            generate_devops_trend_detail_plot,
+            generate_devops_trend_plot,
+        )
+        from perception_catalog_analyzer.plot.devops import generate_devops_plot
+    except ImportError as exc:
+        raise RuntimeError(
+            "perception_catalog_analyzer trend support is unavailable. "
+            f"Original error: {exc!s}"
+        ) from exc
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    _notify(progress_callback, "Collecting trend history")
+    performance_trend_data = load_performance_trend_data(list(metadata_list))
+    map_trend_plot_path = output_dir / "map_trend.png"
+    prediction_trend_plot_path = output_dir / "prediction_trend.png"
+    if performance_trend_data:
+        _notify(progress_callback, "Rendering trend plots")
+        generate_map_trend_plot(performance_trend_data, map_trend_plot_path)
+        generate_prediction_trend_plot(performance_trend_data, prediction_trend_plot_path)
+
+    devops_trend_data = load_devops_trend_data(list(metadata_list))
+    devops_trend_plot_path = output_dir / "devops_trend.png"
+    devops_data = {}
+    devops_plot_path = None
+    if current_devops_summary_path is not None and current_devops_summary_path.exists():
+        current_devops_summary = load_trend_summary_file(current_devops_summary_path)
+        if classify_trend_summary(current_devops_summary) == "devops":
+            devops_data = _normalize_devops_summary_structure(current_devops_summary)
+            if devops_data:
+                _notify(progress_callback, "Rendering current pass-rate plot")
+                devops_plot_path = output_dir / "devops.png"
+                generate_devops_plot(devops_data, devops_plot_path)
+    if devops_trend_data:
+        _notify(progress_callback, "Rendering pass-rate trend plots")
+        generate_devops_trend_plot(devops_trend_data, devops_trend_plot_path)
+        detail_cases = _add_devops_detail_trend_rates(devops_trend_data)
+        if detail_cases:
+            generate_devops_trend_detail_plot(
+                devops_trend_data,
+                detail_cases,
+                devops_trend_plot_path,
+            )
+
+    return {
+        "performance_trend_data": performance_trend_data,
+        "map_trend_plot_path": map_trend_plot_path,
+        "prediction_trend_plot_path": prediction_trend_plot_path,
+        "devops_data": devops_data,
+        "devops_plot_path": devops_plot_path,
+        "devops_trend_data": _devops_trend_rows_for_template(devops_trend_data),
+        "devops_trend_plot_path": devops_trend_plot_path,
+        "job_ids": [],
+    }
+
+
+def _update_template_compat(
+    update_template_func: Callable[..., Sequence[str]],
+    project_id: str,
+    version: str,
+    *,
+    template_dir: Path,
+    context_dir: Path,
+    trend_context: dict[str, object] | None = None,
+) -> Sequence[str]:
+    """Call update_template across analyzer versions with different signatures."""
+    try:
+        parameters = inspect.signature(update_template_func).parameters
+    except (TypeError, ValueError):
+        parameters = {}
+
+    trend_context = trend_context or {}
+    path_manager = SimpleNamespace(specsheet_path=context_dir)
+    semantic_kwargs = {
+        "project_id": project_id,
+        "pilot_auto_version": version,
+        "version": version,
+        "devops_data": trend_context.get("devops_data", {}),
+        "devops_plot_path": trend_context.get("devops_plot_path"),
+        "performance_trend_data": trend_context.get("performance_trend_data", []),
+        "map_trend_plot_path": trend_context.get("map_trend_plot_path", context_dir / "map_trend.png"),
+        "prediction_trend_plot_path": trend_context.get(
+            "prediction_trend_plot_path", context_dir / "prediction_trend.png"
+        ),
+        "devops_trend_data": trend_context.get("devops_trend_data", []),
+        "devops_trend_plot_path": trend_context.get(
+            "devops_trend_plot_path", context_dir / "devops_trend.png"
+        ),
+        "job_ids": trend_context.get("job_ids", []),
+        "template_name": "static_body.html",
+        "extensions": ["html"],
+        "template_dir": str(template_dir),
+        "path_manager": path_manager,
+        "show_other_infos": bool(trend_context.get("performance_trend_data")),
+    }
+
+    accepts_kwargs = any(
+        param.kind == inspect.Parameter.VAR_KEYWORD for param in parameters.values()
+    )
+    if accepts_kwargs or not parameters:
+        with _patch_template_dataset_paths(update_template_func, context_dir):
+            return update_template_func(**semantic_kwargs)
+
+    args: list[object] = []
+    kwargs: dict[str, object] = {}
+    for name, param in parameters.items():
+        if name not in semantic_kwargs:
+            continue
+        value = semantic_kwargs[name]
+        if param.kind in (
+            inspect.Parameter.POSITIONAL_ONLY,
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        ):
+            args.append(value)
+        elif param.kind == inspect.Parameter.KEYWORD_ONLY:
+            kwargs[name] = value
+    with _patch_template_dataset_paths(update_template_func, context_dir):
+        return update_template_func(*args, **kwargs)
+
+
+@contextmanager
+def _patch_template_dataset_paths(
+    update_template_func: Callable[..., Sequence[str]],
+    context_dir: Path,
+):
+    """Redirect analyzer dataset-summary outputs away from read-only package config."""
+    globals_dict = getattr(update_template_func, "__globals__", {})
+    patch_keys = ("DATASET_SUMMARY_PATH", "DATASET_TRAIN_PATH", "DATASET_TEST_PATH")
+    originals = {key: globals_dict.get(key) for key in patch_keys if key in globals_dict}
+    if not originals:
+        yield
+        return
+
+    dataset_dir = context_dir / "dataset_assets"
+    dataset_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        for key, original_path in originals.items():
+            if not isinstance(original_path, Path) or not original_path.exists():
+                continue
+            target_path = dataset_dir / original_path.name
+            if not target_path.exists():
+                shutil.copy2(original_path, target_path)
+            globals_dict[key] = target_path
+        yield
+    finally:
+        for key, original_path in originals.items():
+            globals_dict[key] = original_path
+
+def _scene_dataframe_from_dir_compat(
+    scene_dataframe_cls,
+    run_path: Path,
+    *,
+    topic_name: str,
+):
+    """Call SceneDataFrame.from_dir across analyzer versions with/without topic."""
+    from_dir = scene_dataframe_cls.from_dir
+    try:
+        parameters = inspect.signature(from_dir).parameters
+    except (TypeError, ValueError):
+        parameters = {}
+
+    required_parameters = [
+        param
+        for param in parameters.values()
+        if param.kind in (
+            inspect.Parameter.POSITIONAL_ONLY,
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            inspect.Parameter.KEYWORD_ONLY,
+        )
+        and param.default is inspect.Parameter.empty
+    ]
+    accepts_varargs = any(
+        param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
+        for param in parameters.values()
+    )
+
+    if accepts_varargs or len(required_parameters) >= 2:
+        return from_dir(run_path, topic_name)
+    return from_dir(run_path)
+
+
+_CURRENT_NUMERIC_COLUMNS = {
+    "unix_time",
+    "x",
+    "y",
+    "confidence",
+    "pointcloud_num",
+    "visibility",
+    "x_error",
+    "y_error",
+    "yaw_error",
+    "speed_error",
+    "frame_index",
+}
+_FUTURE_NUMERIC_COLUMNS = {
+    "x",
+    "y",
+    "tx",
+    "ty",
+    "confidence",
+    "visibility",
+    "relative_time",
+    "pair_dt_sec",
+}
+
+
+def _coerce_numeric_columns(frame: pd.DataFrame, columns: set[str]) -> pd.DataFrame:
+    if frame.empty:
+        return frame
+    coerced = frame.copy()
+    for column in sorted(columns.intersection(coerced.columns)):
+        coerced[column] = pd.to_numeric(coerced[column], errors="coerce")
+    return coerced
+
+
+def _coerce_specsheet_scene_numeric_columns(df):
+    """Normalize analyzer-loaded CSV values before NumPy-heavy specsheet metrics."""
+    if hasattr(df, "current"):
+        df.current = _coerce_numeric_columns(df.current, _CURRENT_NUMERIC_COLUMNS)
+        if getattr(df, "future", None) is not None:
+            df.future = _coerce_numeric_columns(df.future, _FUTURE_NUMERIC_COLUMNS)
+        return df
+    if isinstance(df, pd.DataFrame):
+        return _coerce_numeric_columns(
+            df,
+            _CURRENT_NUMERIC_COLUMNS | _FUTURE_NUMERIC_COLUMNS,
+        )
+    return df
+
+
+def _get_blocks_compat(
+    get_blocks_func: Callable[..., tuple[Sequence[str], Sequence[str]]],
+    *,
+    df,
+    labels: Sequence[str],
+    metrics: Sequence[str],
+    topic_name: str,
+    outdir: Path,
+    evaluation_type: str,
+):
+    """Call get_blocks across analyzer versions with different keyword support."""
+    parquet_compression = "snappy"
+    try:
+        from perception_catalog_analyzer.types import ParquetCompression
+
+        parquet_compression = ParquetCompression.SNAPPY
+    except Exception:
+        pass
+
+    semantic_kwargs = {
+        "df": df,
+        "labels": list(labels),
+        "metrics": list(metrics),
+        "resource_path": outdir,
+        "html_path": outdir.parent if outdir.name == "resources" else outdir,
+        "parquet_compression": parquet_compression,
+        "topic_name": topic_name,
+        "topic": topic_name,
+        "path": outdir,
+        "outdir": outdir,
+        "evaluation_type": evaluation_type,
+    }
+    try:
+        parameters = inspect.signature(get_blocks_func).parameters
+    except (TypeError, ValueError):
+        parameters = {}
+
+    accepts_kwargs = any(
+        param.kind == inspect.Parameter.VAR_KEYWORD for param in parameters.values()
+    )
+    if accepts_kwargs or not parameters:
+        return get_blocks_func(**semantic_kwargs)
+
+    args: list[object] = []
+    kwargs: dict[str, object] = {}
+    for name, param in parameters.items():
+        if name not in semantic_kwargs:
+            continue
+        value = semantic_kwargs[name]
+        if param.kind in (
+            inspect.Parameter.POSITIONAL_ONLY,
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        ):
+            args.append(value)
+        elif param.kind == inspect.Parameter.KEYWORD_ONLY:
+            kwargs[name] = value
+    return get_blocks_func(*args, **kwargs)
+
+
+def _specsheet_compat(
+    specsheet_func: Callable[..., None],
+    *,
+    html: Sequence[str],
+    abstract_html: Sequence[str],
+    detailed_html: Sequence[str],
+    outdir: Path,
+    report_name: str,
+) -> None:
+    """Call specsheet across analyzer versions with path/outdir differences."""
+    path_manager = SimpleNamespace(specsheet_path=outdir)
+    semantic_kwargs = {
+        "html": list(html),
+        "abstract_html": list(abstract_html),
+        "detailed_html": list(detailed_html),
+        "path_manager": path_manager,
+        "path": outdir,
+        "outdir": outdir,
+        "report_name": report_name,
+    }
+    try:
+        parameters = inspect.signature(specsheet_func).parameters
+    except (TypeError, ValueError):
+        parameters = {}
+
+    accepts_kwargs = any(
+        param.kind == inspect.Parameter.VAR_KEYWORD for param in parameters.values()
+    )
+    if accepts_kwargs or not parameters:
+        specsheet_func(**semantic_kwargs)
+        return
+
+    args: list[object] = []
+    kwargs: dict[str, object] = {}
+    for name, param in parameters.items():
+        if name not in semantic_kwargs:
+            continue
+        value = semantic_kwargs[name]
+        if param.kind in (
+            inspect.Parameter.POSITIONAL_ONLY,
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        ):
+            args.append(value)
+        elif param.kind == inspect.Parameter.KEYWORD_ONLY:
+            kwargs[name] = value
+    specsheet_func(*args, **kwargs)
+
+
+def ensure_specsheet_csvs(
+    run_dir: str | Path,
+    *,
+    progress_callback: Callable[[str], None] | None = None,
+) -> dict[str, Path | None]:
+    paths = get_specsheet_artifact_paths(run_dir)
+    current_csv = paths["current_csv"]
+    future_csv = paths["future_csv"]
+    current_parquet = paths["current_parquet"]
+    future_parquet = paths["future_parquet"]
+
+    if not current_csv.exists():
+        if current_parquet.exists():
+            _notify(progress_callback, f"Converting {current_parquet.name} -> {current_csv.name}")
+            _copy_parquet_to_csv(current_parquet, current_csv)
+        elif list_specsheet_source_parquets(run_dir):
+            fallback = list_specsheet_source_parquets(run_dir)[0]
+            _notify(progress_callback, f"Converting {fallback.name} -> {current_csv.name}")
+            _copy_parquet_to_csv(fallback, current_csv)
+        else:
+            _notify(progress_callback, "No CSV found. Building CSV from pkl / pkl.z files")
+            from lib.perception_catalog_io import build_scene_dataframe_from_pkl_dir
+
+            skip_counts: dict[str, int] = {}
+
+            def _on_progress(done: int, total: int) -> None:
+                _notify(progress_callback, f"Processing pkl files {done}/{total}")
+
+            def _on_skip(path: str | Path, reason: str) -> None:
+                skip_counts[reason] = skip_counts.get(reason, 0) + 1
+
+            df = build_scene_dataframe_from_pkl_dir(
+                run_dir,
+                on_progress=_on_progress,
+                on_skip=_on_skip,
+            )
+            if skip_counts:
+                details = ", ".join(
+                    f"{count} {reason}" for reason, count in sorted(skip_counts.items())
+                )
+                _notify(progress_callback, f"Skipped pkl files: {details}")
+            df.to_csv(run_dir)
+            if not current_csv.exists():
+                raise FileNotFoundError(f"Failed to generate {current_csv}")
+
+    if not future_csv.exists() and future_parquet.exists():
+        _notify(progress_callback, f"Converting {future_parquet.name} -> {future_csv.name}")
+        _copy_parquet_to_csv(future_parquet, future_csv)
+
+    return {
+        "current_csv": current_csv if current_csv.exists() else None,
+        "future_csv": future_csv if future_csv.exists() else None,
+    }
+
+
+def generate_specsheet_pdf(
+    run_dir: str | Path,
+    *,
+    project_id: str,
+    version: str,
+    labels: Sequence[str],
+    topic_name: str = DEFAULT_SPECSHEET_TOPIC,
+    include_trend: bool = False,
+    trend_metadata: dict[str, Any] | None = None,
+    force: bool = False,
+    progress_callback: Callable[[str], None] | None = None,
+) -> tuple[Path, bool]:
+    paths = get_specsheet_artifact_paths(run_dir)
+    specsheet_dir = paths["specsheet_dir"]
+    pdf_path = paths["specsheet_pdf"]
+
+    if not force and is_specsheet_pdf_fresh(run_dir):
+        _notify(progress_callback, "Using existing up-to-date spec-sheet PDF")
+        return pdf_path, False
+
+    ensure_specsheet_csvs(run_dir, progress_callback=progress_callback)
+    resolved_topic, detected_topics = resolve_specsheet_topic_name(run_dir, topic_name)
+    if resolved_topic != topic_name:
+        detected_text = ", ".join(detected_topics) if detected_topics else "none"
+        _notify(
+            progress_callback,
+            f"Using detected topic {resolved_topic} instead of requested topic {topic_name} (detected: {detected_text})",
+        )
+        topic_name = resolved_topic
+
+    try:
+        from perception_catalog_analyzer.dataframe import SceneDataFrame
+        from perception_catalog_analyzer.specsheet import get_blocks, specsheet
+        from perception_catalog_analyzer import template as template_module
+        from perception_catalog_analyzer.template import update_template
+    except ImportError as exc:
+        raise RuntimeError(
+            "perception_catalog_analyzer spec-sheet generation is unavailable. "
+            f"Install the dependency first. Original error: {exc!s}"
+        ) from exc
+
+    run_path = paths["run_dir"]
+    resource_dir = run_path / "resources"
+    resource_dir.mkdir(parents=True, exist_ok=True)
+    specsheet_dir.mkdir(parents=True, exist_ok=True)
+    block_resource_dir = specsheet_dir / "resources"
+    block_resource_dir.mkdir(parents=True, exist_ok=True)
+    trend_asset_dir = specsheet_dir / "trend_assets"
+    trend_asset_dir.mkdir(parents=True, exist_ok=True)
+
+    _notify(progress_callback, "Loading CSV files")
+    df = _scene_dataframe_from_dir_compat(
+        SceneDataFrame,
+        run_path,
+        topic_name=topic_name,
+    )
+    df = _coerce_specsheet_scene_numeric_columns(df)
+    metrics = list(DEFAULT_SPECSHEET_METRICS)
+    if getattr(df, "future", None) is not None:
+        metrics.extend(FUTURE_SPECSHEET_METRICS)
+
+    _notify(progress_callback, "Building abstract and detail sections")
+    with _patch_block_generation_progress(progress_callback):
+        abstract, detailed = _get_blocks_compat(
+            get_blocks,
+            df=df,
+            labels=list(labels),
+            metrics=metrics,
+            topic_name=topic_name,
+            outdir=block_resource_dir.resolve(),
+            evaluation_type="full",
+        )
+
+    trend_context: dict[str, object] | None = None
+    if include_trend:
+        if trend_metadata is None:
+            raise ValueError("Trend metadata is required when trend mode is enabled.")
+        _notify(progress_callback, "Validating full trend summary")
+        generated_trend_summary = block_resource_dir / TREND_SUMMARY_FILENAME
+        trend_summary_path = generated_trend_summary if generated_trend_summary.exists() else paths["trend_summary"]
+        ensure_full_trend_summary(trend_summary_path)
+        if generated_trend_summary.exists() and not paths["trend_summary"].exists():
+            shutil.copy2(generated_trend_summary, paths["trend_summary"])
+        _notify(progress_callback, "Saving trend metadata")
+        write_trend_metadata(run_path, trend_metadata)
+        metadata_list = discover_trend_metadata_files()
+        release_context = get_release_specsheet_context(run_path)
+        current_devops_summary_path = None
+        if release_context is not None:
+            roles = release_context.get("roles", {})
+            if isinstance(roles, dict):
+                devops_info = roles.get("devops", {})
+                if isinstance(devops_info, dict):
+                    summary_path = devops_info.get("summary")
+                    if isinstance(summary_path, Path):
+                        current_devops_summary_path = summary_path
+        trend_context = _build_trend_context(
+            metadata_list,
+            trend_asset_dir,
+            current_devops_summary_path=current_devops_summary_path,
+            progress_callback=progress_callback,
+        )
+
+    _notify(progress_callback, "Rendering PDF")
+    for stale_output in (specsheet_dir / "specsheet.html", pdf_path):
+        if stale_output.exists() and not os.access(stale_output, os.W_OK):
+            stale_output.unlink()
+    template_dir = Path(template_module.__file__).resolve().parent.parent / "template"
+    html = _prefer_cjk_font_stack(
+        _update_template_compat(
+            update_template,
+            project_id,
+            version,
+            template_dir=template_dir,
+            context_dir=specsheet_dir,
+            trend_context=trend_context,
+        )
+    )
+    _specsheet_compat(
+        specsheet,
+        html=html,
+        abstract_html=abstract,
+        detailed_html=detailed,
+        outdir=specsheet_dir,
+        report_name="specsheet",
+    )
+    if not pdf_path.exists():
+        raise FileNotFoundError(f"Spec-sheet PDF was not created: {pdf_path}")
+    _notify(progress_callback, "Spec-sheet PDF is ready")
+    return pdf_path, True
+
+
+def collect_candidate_specsheet_labels(
+    run_dir: str | Path,
+    *,
+    preferred: Iterable[str] | None = None,
+) -> list[str]:
+    preferred_labels = [str(v) for v in (preferred or []) if str(v).strip()]
+    if preferred_labels:
+        return sorted(dict.fromkeys(preferred_labels))
+
+    paths = get_specsheet_artifact_paths(run_dir)
+    for source in (
+        paths["current_csv"],
+        paths["current_parquet"],
+    ):
+        if not source.exists():
+            continue
+        try:
+            if source.suffix == ".csv":
+                frame = pd.read_csv(source)
+            else:
+                frame = pd.read_parquet(source, columns=["label"])
+            if "label" not in frame.columns:
+                continue
+            labels = [str(v) for v in frame["label"].dropna().unique() if str(v).strip()]
+            if labels:
+                return sorted(labels)
+        except Exception:
+            continue
+    return []
+
+
+_PROGRESS_FRACTION_PATTERN = re.compile(r"(?P<done>\d+)\s*/\s*(?P<total>\d+)")
+
+
+def progress_fraction_from_message(message: str) -> float | None:
+    match = _PROGRESS_FRACTION_PATTERN.search(message or "")
+    if not match:
+        return None
+    done = int(match.group("done"))
+    total = int(match.group("total"))
+    if total <= 0:
+        return None
+    return max(0.0, min(1.0, done / total))
diff --git a/evaluation_dashboard_app/lib/summary_compare.py b/evaluation_dashboard_app/lib/summary_compare.py
index bb7272c..e152409 100644
--- a/evaluation_dashboard_app/lib/summary_compare.py
+++ b/evaluation_dashboard_app/lib/summary_compare.py
@@ -21,3 +21,56 @@ def build_summary_delta(df_a: pd.DataFrame, df_b: pd.DataFrame) -> pd.DataFrame:
         result[f"{m}_B"] = df_b.loc[common_idx, m]
         result[f"{m}_delta"] = df_b.loc[common_idx, m] - df_a.loc[common_idx, m]
     return result.reset_index()
+
+
+def summary_delta_overlap_stats(df_a: pd.DataFrame, df_b: pd.DataFrame) -> dict:
+    """Describe index overlap used by :func:`build_summary_delta` (same join-key rules)."""
+    if df_a is None or df_b is None:
+        return {"valid": False, "error": "Summary dataframe missing.", "key_cols": []}
+    if "id" not in df_a.columns or "id" not in df_b.columns:
+        return {
+            "valid": False,
+            "error": "Summary must include an `id` column for delta alignment.",
+            "key_cols": ["id"],
+        }
+    if "perception_label" in df_a.columns and "perception_label" in df_b.columns:
+        key_cols = ["id", "perception_label"]
+    else:
+        key_cols = ["id"]
+    for c in key_cols:
+        if c not in df_a.columns or c not in df_b.columns:
+            return {
+                "valid": False,
+                "error": f"Join needs column `{c}` in both summaries; one run is missing it.",
+                "key_cols": key_cols,
+            }
+
+    idx_a = df_a.set_index(key_cols).index
+    idx_b = df_b.set_index(key_cols).index
+    common = idx_a.intersection(idx_b)
+    only_a = idx_a.difference(idx_b)
+    only_b = idx_b.difference(idx_a)
+
+    def _sample(idx_diff: pd.Index, k: int = 5) -> list[str]:
+        if len(idx_diff) == 0:
+            return []
+        out: list[str] = []
+        for x in list(idx_diff)[:k]:
+            if isinstance(x, tuple):
+                out.append(", ".join(str(p) for p in x))
+            else:
+                out.append(str(x))
+        return out
+
+    return {
+        "valid": True,
+        "key_cols": key_cols,
+        "n_rows_baseline": int(len(df_a)),
+        "n_rows_candidate": int(len(df_b)),
+        "n_matched_keys": int(len(common)),
+        "n_only_baseline": int(len(only_a)),
+        "n_only_candidate": int(len(only_b)),
+        "sample_only_baseline": _sample(only_a),
+        "sample_only_candidate": _sample(only_b),
+        "matched_empty": len(common) == 0,
+    }
diff --git a/evaluation_dashboard_app/lib/t4_dataset_embed.py b/evaluation_dashboard_app/lib/t4_dataset_embed.py
new file mode 100644
index 0000000..c49d3bf
--- /dev/null
+++ b/evaluation_dashboard_app/lib/t4_dataset_embed.py
@@ -0,0 +1,104 @@
+"""Build embeddable T4 dataset metadata: JSON records, query strings, and ``POST /render`` bodies.
+
+Use with :mod:`lib.t4_visualizer_client` when wiring eval parquet rows or dashboards to ``t4-server``.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, List, Mapping, Optional, Sequence
+from urllib.parse import quote
+
+from lib.t4_visualizer_client import (
+    RenderRequest,
+    TargetObjectIn,
+    render_request_to_json_body,
+    target_object_from_gt_row,
+)
+
+
+def t4_dataset_context(
+    t4dataset_id: str,
+    scenario_name: str,
+    *,
+    frame_index: Optional[int] = None,
+    data_dir: Optional[str] = None,
+    sample_token: Optional[str] = None,
+) -> dict[str, Any]:
+    """Structured record for logging, sidecar JSON, or UI state."""
+    out: dict[str, Any] = {
+        "t4dataset_id": t4dataset_id,
+        "scenario_name": scenario_name,
+    }
+    if frame_index is not None:
+        out["frame_index"] = int(frame_index)
+    if data_dir:
+        out["data_dir"] = data_dir
+    if sample_token:
+        out["sample_token"] = sample_token
+    return out
+
+
+def t4_share_query_params(
+    t4dataset_id: str,
+    scenario_name: str,
+    frame_index: int = 0,
+) -> str:
+    """Query string without leading ``?`` (for bookmarks or deep links)."""
+    return (
+        f"t4dataset_id={quote(str(t4dataset_id), safe='')}"
+        f"&scenario_name={quote(str(scenario_name), safe='')}"
+        f"&frame_index={int(frame_index)}"
+    )
+
+
+def t4_share_query_params_from_post_render_json(body: Mapping[str, Any]) -> str:
+    """Query string (no ``?``) with a single ``render_json`` param: same object as curl ``-d`` / ``post_render_json``."""
+    compact = json.dumps(dict(body), separators=(",", ":"), ensure_ascii=False)
+    return f"render_json={quote(compact, safe='')}"
+
+
+def target_objects_from_rows(rows: Sequence[Mapping[str, Any]]) -> List[dict[str, Any]]:
+    """Map each row to a ``target_objects`` dict (see :func:`target_object_from_gt_row`)."""
+    return [target_object_from_gt_row(r) for r in rows]
+
+
+def build_render_request_embed(
+    t4dataset_id: str,
+    scenario_name: str,
+    frame_index: int,
+    *,
+    target_rows: Optional[Sequence[Mapping[str, Any]]] = None,
+    target_objects: Optional[Sequence[TargetObjectIn]] = None,
+    show_annotations: bool = True,
+    crop_cameras: bool = False,
+    crop_padding: int = 40,
+    crop_min_size: int = 300,
+    cameras: Optional[List[str]] = None,
+    version: Optional[str] = None,
+) -> dict[str, Any]:
+    """Return ``context`` plus a ``post_render_json`` body ready for ``POST /render``."""
+    to_list: List[TargetObjectIn] = []
+    if target_objects is not None:
+        to_list = list(target_objects)
+    elif target_rows is not None:
+        for r in target_rows:
+            d = target_object_from_gt_row(r)
+            to_list.append(TargetObjectIn(**d))
+    req = RenderRequest(
+        t4dataset_id=t4dataset_id,
+        scenario_name=scenario_name,
+        frame_index=int(frame_index),
+        target_objects=to_list,
+        show_annotations=show_annotations,
+        crop_cameras=crop_cameras,
+        crop_padding=crop_padding,
+        crop_min_size=crop_min_size,
+        cameras=cameras,
+        version=version,
+    )
+    body = render_request_to_json_body(req)
+    return {
+        "context": t4_dataset_context(t4dataset_id, scenario_name, frame_index=frame_index),
+        "post_render_json": body,
+    }
diff --git a/evaluation_dashboard_app/lib/t4_three_layers.py b/evaluation_dashboard_app/lib/t4_three_layers.py
new file mode 100644
index 0000000..54d7ced
--- /dev/null
+++ b/evaluation_dashboard_app/lib/t4_three_layers.py
@@ -0,0 +1,260 @@
+"""T4 `/viewer/three` embed: GT / pred / matched 3D box layers via postMessage."""
+
+from __future__ import annotations
+
+import html
+import json
+import math
+from urllib.parse import urlencode
+from typing import TYPE_CHECKING
+
+import streamlit.components.v1 as components
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+
+_OPTIONAL_NUMERIC_FIELDS = (
+    "vx",
+    "vy",
+    "confidence",
+    "pointcloud_num",
+    "x_error",
+    "y_error",
+    "z_error",
+    "yaw_error",
+    "vx_error",
+    "vy_error",
+    "speed_error",
+    "center_distance",
+    "plane_distance",
+    "pair_dt_sec",
+    "dx_min",
+    "dy_min",
+    "unix_time",
+    "frame_index",
+)
+
+_OPTIONAL_TEXT_FIELDS = (
+    "frame_id",
+    "shape_type",
+    "visibility",
+    "pair_uuid",
+    "topic_name",
+    "t4dataset_id",
+    "suite_name",
+    "t4dataset_name",
+    "scenario_name",
+    "run",
+    "source",
+)
+
+_VEHICLE_LABELS = {"car", "truck", "bus", "trailer"}
+_LEGACY_EXTERNAL_BBOX_YAW_OFFSET = math.pi / 2
+
+
+def _is_missing(value: object) -> bool:
+    if value is None:
+        return True
+    if isinstance(value, float) and math.isnan(value):
+        return True
+    return bool(value != value)
+
+
+def resolve_t4_dataset_id(dff: "pd.DataFrame") -> str:
+    """Parquet **t4dataset_id** or **t4dataset_name** for the current frame (empty if missing)."""
+    if dff is None or dff.empty:
+        return ""
+    if "t4dataset_id" in dff.columns and dff["t4dataset_id"].notna().any():
+        return str(dff["t4dataset_id"].dropna().astype(str).iloc[0])
+    if "t4dataset_name" in dff.columns and dff["t4dataset_name"].notna().any():
+        return str(dff["t4dataset_name"].dropna().iloc[0])
+    return ""
+
+
+def resolve_t4_scenario(dff: "pd.DataFrame", scenario_from_sidebar: str | None) -> str:
+    if scenario_from_sidebar is not None and str(scenario_from_sidebar).strip() != "":
+        return str(scenario_from_sidebar)
+    if dff is not None and not dff.empty and "scenario_name" in dff.columns and dff["scenario_name"].notna().any():
+        return str(dff["scenario_name"].dropna().iloc[0])
+    return ""
+
+
+def infer_external_bbox_alignment_query_params(df: "pd.DataFrame") -> str:
+    """Return `/viewer/three` query params for eval bbox dimension/yaw convention.
+
+    Older eval parquet exports often store vehicle dimensions as width-forward
+    (`length < width`) and rely on the T4 viewer's legacy `+pi/2` external bbox
+    yaw offset. Newer app/analyzer exports store body-x as `length` and body-y as
+    `width`; those must pass `external_bbox_yaw_offset=0` or the viewer rotates
+    them by 90 degrees.
+    """
+    if df is None or df.empty or not {"length", "width"}.issubset(df.columns):
+        yaw_offset = _LEGACY_EXTERNAL_BBOX_YAW_OFFSET
+    else:
+        sample = df
+        if "label" in sample.columns:
+            labels = sample["label"].astype(str).str.lower()
+            vehicle_sample = sample[labels.isin(_VEHICLE_LABELS)]
+            if not vehicle_sample.empty:
+                sample = vehicle_sample
+        if "source" in sample.columns:
+            gt_sample = sample[sample["source"].astype(str) == "GT"]
+            if not gt_sample.empty:
+                sample = gt_sample
+
+        dims = sample[["length", "width"]].apply(lambda s: s.astype(float), axis=0)
+        dims = dims[(dims["length"] > 0) & (dims["width"] > 0)]
+        if dims.empty:
+            yaw_offset = _LEGACY_EXTERNAL_BBOX_YAW_OFFSET
+        else:
+            length_forward_ratio = float((dims["length"] >= dims["width"]).mean())
+            yaw_offset = 0.0 if length_forward_ratio >= 0.8 else _LEGACY_EXTERNAL_BBOX_YAW_OFFSET
+
+    return urlencode(
+        {
+            "external_bbox_yaw_offset": f"{yaw_offset:.12g}",
+            "external_bbox_swap_lw": "false",
+        }
+    )
+
+
+def _single_frame_layer_dict(df_frame: "pd.DataFrame") -> dict:
+    """Per-frame gt / pred / matched_pairs (no ``type`` field); used by single- and all-frame payloads."""
+    if df_frame is None or df_frame.empty:
+        return {"gt": [], "pred": [], "matched_pairs": []}
+
+    def _row_to_box(row: "pd.Series") -> dict:
+        box = {
+            "x": float(row.get("x", 0.0) or 0.0),
+            "y": float(row.get("y", 0.0) or 0.0),
+            "z": float(row.get("z", 0.0) or 0.0),
+            "width": float(row.get("width", 0.0) or 0.0),
+            "length": float(row.get("length", 0.0) or 0.0),
+            "height": float(row.get("height", 1.5) or 1.5),
+            "yaw": float(row.get("yaw", 0.0) or 0.0),
+            "label": str(row.get("label", "") or ""),
+            "uuid": str(row.get("uuid", "") or ""),
+            "status": str(row.get("status", "") or ""),
+        }
+        for field in _OPTIONAL_NUMERIC_FIELDS:
+            if field in row.index:
+                value = row.get(field)
+                if not _is_missing(value):
+                    box[field] = float(value)
+        for field in _OPTIONAL_TEXT_FIELDS:
+            if field in row.index:
+                value = row.get(field)
+                if not _is_missing(value):
+                    box[field] = str(value)
+        return box
+
+    gt_df = df_frame[df_frame["source"] == "GT"].copy()
+    pred_df = df_frame[df_frame["source"] == "EST"].copy()
+    gt_boxes = [_row_to_box(r) for _, r in gt_df.iterrows()]
+    pred_boxes = [_row_to_box(r) for _, r in pred_df.iterrows()]
+
+    gt_tp_idx: dict[str, int] = {}
+    for i, b in enumerate(gt_boxes):
+        match_key = str(b.get("pair_uuid") or b.get("uuid") or "")
+        if b["status"] == "TP" and match_key:
+            gt_tp_idx.setdefault(match_key, i)
+    pred_tp_idx: dict[str, int] = {}
+    for i, b in enumerate(pred_boxes):
+        match_key = str(b.get("pair_uuid") or b.get("uuid") or "")
+        if b["status"] == "TP" and match_key:
+            pred_tp_idx.setdefault(match_key, i)
+    matched_pairs = []
+    for match_key, gi in gt_tp_idx.items():
+        pi = pred_tp_idx.get(match_key)
+        if pi is not None:
+            matched_pairs.append({"gt_idx": int(gi), "pred_idx": int(pi), "pair_uuid": match_key})
+
+    return {
+        "gt": gt_boxes,
+        "pred": pred_boxes,
+        "matched_pairs": matched_pairs,
+    }
+
+
+def build_three_layer_payload(df_frame: "pd.DataFrame") -> dict:
+    """Build GT/Pred/Matched overlay payload for `/viewer/three` iframe (single frame)."""
+    if df_frame is None or df_frame.empty:
+        return {"type": "bbox_layers_clear"}
+    inner = _single_frame_layer_dict(df_frame)
+    return {
+        "type": "bbox_layers",
+        "gt": inner["gt"],
+        "pred": inner["pred"],
+        "matched_pairs": inner["matched_pairs"],
+    }
+
+
+def build_three_layer_payload_all_frames(df: "pd.DataFrame") -> dict:
+    """Build payload with eval layers for every ``frame_index`` in *df* (viewer picks by internal frame)."""
+    if df is None or df.empty:
+        return {"type": "bbox_layers_by_frame", "frames": {}}
+    if "frame_index" not in df.columns:
+        return {"type": "bbox_layers_by_frame", "frames": {}}
+    frames: dict[str, dict] = {}
+    for fi, group in df.groupby("frame_index", sort=True):
+        try:
+            key = str(int(fi))
+        except (TypeError, ValueError):
+            continue
+        frames[key] = _single_frame_layer_dict(group)
+    return {"type": "bbox_layers_by_frame", "frames": frames}
+
+
+def render_t4_three_js_embed(viewer_three_url: str, layer_payload: dict, height: int = 700) -> None:
+    """Iframe to T4 three viewer + postMessage with bbox layer payload (GT, pred, matched pairs)."""
+    _payload_json = json.dumps(layer_payload, ensure_ascii=True)
+    _payload_b64 = _payload_json.encode("utf-8").hex()
+    _iframe_src = html.escape(viewer_three_url, quote=True)
+    components.html(
+        (
+            f'<iframe id="t4-three-viewer" src="{_iframe_src}" '
+            f'width="100%" height="{height}" style="border:none;border-radius:8px;background:#e2e8f0" '
+            f'allowfullscreen allow="fullscreen *" '
+            f'loading="lazy" title="T4 three viewer" referrerpolicy="no-referrer-when-downgrade"></iframe>'
+            "<script>"
+            "(()=>{"
+            "const iframe=document.getElementById('t4-three-viewer');"
+            f"const payloadHex='{_payload_b64}';"
+            "const hexToUtf8=(hex)=>{"
+            "if(!hex||hex.length%2!==0)return '';"
+            "const bytes=new Uint8Array(hex.length/2);"
+            "for(let i=0;i<hex.length;i+=2){bytes[i/2]=parseInt(hex.slice(i,i+2),16)||0;}"
+            "return new TextDecoder().decode(bytes);"
+            "};"
+            "let payload={type:'bbox_layers_clear'};"
+            "try{"
+            "const payloadJson=hexToUtf8(payloadHex);"
+            "payload=JSON.parse(payloadJson);"
+            "const fc=payload.frames&&typeof payload.frames==='object'?Object.keys(payload.frames).length:0;"
+            "console.info('[bbox-debug] payload prepared', {type:payload.type,gt:(payload.gt||[]).length,pred:(payload.pred||[]).length,matched:(payload.matched_pairs||[]).length,frames:fc});"
+            "}catch(err){"
+            "console.error('[bbox-debug] payload parse failed', err);"
+            "}"
+            "let postCount=0;"
+            "const post=(reason)=>{"
+            "if(!iframe||!iframe.contentWindow)return;"
+            "let targetOrigin='*';"
+            "try{ targetOrigin = new URL(iframe.src, window.location.href).origin || '*'; }catch(_){ targetOrigin='*'; }"
+            "postCount+=1;"
+            "iframe.contentWindow.postMessage(payload,targetOrigin);"
+            "console.info('[bbox-debug] postMessage sent', {reason,postCount,targetOrigin,payloadType:payload.type});"
+            "};"
+            "iframe.addEventListener('load',()=>{"
+            "post('iframe-load');"
+            "let n=0;"
+            "const t=setInterval(()=>{post('retry');n+=1;if(n>12)clearInterval(t);},250);"
+            "});"
+            "setTimeout(()=>post('initial-delay-300ms'),300);"
+            "setTimeout(()=>post('initial-delay-1200ms'),1200);"
+            "})();"
+            "</script>"
+        ),
+        height=height + 24,
+        scrolling=True,
+    )
diff --git a/evaluation_dashboard_app/lib/t4_visualizer_client.py b/evaluation_dashboard_app/lib/t4_visualizer_client.py
new file mode 100644
index 0000000..85a96e7
--- /dev/null
+++ b/evaluation_dashboard_app/lib/t4_visualizer_client.py
@@ -0,0 +1,320 @@
+"""HTTP client for the T4 Visualizer FastAPI server (render_frame over HTTP).
+
+Default base URL: ``T4_VISUALIZER_BASE_URL`` environment variable, or ``http://127.0.0.1:8000``.
+
+Does not import t4_devkit or t4_visualizer; only uses ``requests`` against the server's
+``GET /health``, ``GET /server/structure.json``, ``GET /datasets``, ``GET /datasets/{id}/availability``,
+``GET /datasets/{id}/scenarios``, and ``POST /render`` endpoints.
+"""
+
+from __future__ import annotations
+
+import base64
+import os
+from dataclasses import asdict, dataclass, field
+from typing import Any, List, Mapping, Optional, Tuple
+
+import requests
+
+DEFAULT_BASE_URL = "http://10.0.6.148:8000"
+ENV_BASE_URL = "T4_VISUALIZER_BASE_URL"
+
+
+class T4VisualizerError(Exception):
+    """Raised when the T4 visualizer HTTP API returns an error or invalid response."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        status_code: Optional[int] = None,
+        response_text: str = "",
+    ) -> None:
+        super().__init__(message)
+        self.status_code = status_code
+        self.response_text = response_text
+
+
+@dataclass
+class TargetObjectIn:
+    """One object to draw on the render (matches server ``TargetObjectIn``)."""
+
+    uuid: str = ""
+    x: float = 0.0
+    y: float = 0.0
+    z: float = 0.0
+    label: str = ""
+    width: float = 0.0
+    length: float = 0.0
+    height: float = 0.0
+    yaw: float = 0.0
+
+
+@dataclass
+class RenderRequest:
+    """Request body for ``POST /render`` (matches server ``RenderRequest``)."""
+
+    t4dataset_id: str
+    scenario_name: str
+    frame_index: int
+    target_objects: List[TargetObjectIn] = field(default_factory=list)
+    cameras: Optional[List[str]] = None
+    show_annotations: bool = True
+    version: Optional[str] = None
+    crop_cameras: bool = False
+    crop_padding: int = 40
+    crop_min_size: int = 300
+
+
+@dataclass
+class ImageResult:
+    """One rendered PNG in the response."""
+
+    label: str
+    png_base64: str
+
+
+@dataclass
+class RenderResult:
+    """Parsed ``POST /render`` JSON response."""
+
+    sample_token: str
+    timestamp_us: int
+    images: List[ImageResult]
+    raw_json: Optional[dict] = None
+    # Optional server-reported timings (newer t4-server JSON body)
+    elapsed_ms: Optional[float] = None
+    tier4_load_ms: Optional[float] = None
+    render_ms: Optional[float] = None
+
+    def decode_png(self, label: str) -> bytes:
+        """Decode base64 PNG bytes for the image with the given label."""
+        for img in self.images:
+            if img.label == label:
+                return base64.b64decode(img.png_base64)
+        raise KeyError(f"No image with label {label!r}")
+
+    def decode_all_images(self) -> List[Tuple[str, bytes]]:
+        """Decode all images to ``(label, png_bytes)``."""
+        return [(img.label, base64.b64decode(img.png_base64)) for img in self.images]
+
+
+def render_response_json_for_debug(
+    data: Mapping[str, Any], *, max_b64_preview: int = 120
+) -> dict[str, Any]:
+    """Copy of a ``POST /render`` JSON object with ``png_base64`` truncated for UI/debug."""
+    out: dict[str, Any] = dict(data)
+    imgs = out.get("images")
+    if not isinstance(imgs, list):
+        return out
+    trimmed: list[Any] = []
+    for item in imgs:
+        if not isinstance(item, dict):
+            trimmed.append(item)
+            continue
+        row = dict(item)
+        b64 = row.get("png_base64")
+        if isinstance(b64, str) and len(b64) > max_b64_preview:
+            row["png_base64"] = f"{b64[:max_b64_preview]}…"
+            row["png_base64_len"] = len(b64)
+        trimmed.append(row)
+    out["images"] = trimmed
+    return out
+
+
+def _default_base_url() -> str:
+    return os.environ.get(ENV_BASE_URL, DEFAULT_BASE_URL).rstrip("/")
+
+
+def _serialize_target_object(o: TargetObjectIn) -> dict:
+    d = asdict(o)
+    return d
+
+
+def render_request_to_json_body(req: RenderRequest) -> dict:
+    """Build a JSON-serializable dict for ``POST /render``."""
+    out: dict = {
+        "t4dataset_id": req.t4dataset_id,
+        "scenario_name": req.scenario_name,
+        "frame_index": req.frame_index,
+        "target_objects": [_serialize_target_object(o) for o in req.target_objects],
+        "show_annotations": req.show_annotations,
+        "crop_cameras": req.crop_cameras,
+        "crop_padding": req.crop_padding,
+        "crop_min_size": req.crop_min_size,
+    }
+    if req.cameras is not None:
+        out["cameras"] = req.cameras
+    if req.version is not None:
+        out["version"] = req.version
+    return out
+
+
+def target_object_from_gt_row(row: Mapping[str, Any]) -> dict:
+    """Map a GT / eval parquet row to one ``target_objects`` entry for ``RenderRequest``.
+
+    Uses ``uuid`` or ``gt_uuid`` for the instance id; position from ``x``, ``y``, ``z``;
+    optional bbox fields default to ``0.0`` when missing.
+    """
+    raw_id = row.get("uuid")
+    if raw_id is None or raw_id == "":
+        raw_id = row.get("gt_uuid")
+    uuid_str = "" if raw_id is None else str(raw_id)
+
+    def _float(key: str, default: float = 0.0) -> float:
+        v = row.get(key)
+        if v is None:
+            return default
+        return float(v)
+
+    return {
+        "uuid": uuid_str,
+        "x": _float("x"),
+        "y": _float("y"),
+        "z": _float("z"),
+        "label": str(row.get("label") or ""),
+        "width": _float("width"),
+        "length": _float("length"),
+        "height": _float("height"),
+        "yaw": _float("yaw"),
+    }
+
+
+class T4VisualizerClient:
+    """Thin HTTP client for the T4 Visualizer server."""
+
+    def __init__(
+        self,
+        base_url: Optional[str] = None,
+        *,
+        timeout: float = 120.0,
+        session: Optional[requests.Session] = None,
+    ) -> None:
+        raw = base_url if base_url is not None else _default_base_url()
+        self.base_url = raw.rstrip("/")
+        self.timeout = timeout
+        self._session = session if session is not None else requests.Session()
+
+    def _url(self, path: str) -> str:
+        if not path.startswith("/"):
+            path = "/" + path
+        return f"{self.base_url}{path}"
+
+    def _raise_for_status(self, resp: requests.Response) -> None:
+        if resp.ok:
+            return
+        text = (resp.text or "")[:2000]
+        raise T4VisualizerError(
+            f"T4 visualizer HTTP {resp.status_code}: {text[:500]}",
+            status_code=resp.status_code,
+            response_text=text,
+        )
+
+    def health(self) -> dict:
+        """GET /health — status, ``service``, ``version``, ``data_dir_exists``, structure paths (newer servers)."""
+        resp = self._session.get(self._url("/health"), timeout=self.timeout)
+        print(resp.text)
+        self._raise_for_status(resp)
+        try:
+            return resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /health") from exc
+
+    def server_structure_json(self) -> dict:
+        """GET /server/structure.json — Mermaid source for the server internals plus cache/runtime meta."""
+        to = min(30.0, float(self.timeout))
+        resp = self._session.get(self._url("/server/structure.json"), timeout=to)
+        self._raise_for_status(resp)
+        try:
+            return resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /server/structure.json") from exc
+
+    def list_datasets(self) -> dict:
+        """GET /datasets — returns at least ``data_dir`` and ``datasets``."""
+        resp = self._session.get(self._url("/datasets"), timeout=self.timeout)
+        self._raise_for_status(resp)
+        try:
+            return resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /datasets") from exc
+
+    def list_dataset_scenarios(
+        self, t4dataset_id: str, version: Optional[str] = None
+    ) -> dict:
+        """GET /datasets/{t4dataset_id}/scenarios — scene names and ``nbr_samples`` (frame counts).
+
+        Response keys typically include ``t4dataset_id``, ``scenarios`` (list of dicts with
+        ``name``, ``token``, ``description``, ``nbr_samples``), and optional ``version``.
+        """
+        from urllib.parse import quote
+
+        tid = quote(str(t4dataset_id), safe="")
+        params = {"version": version} if version is not None else None
+        resp = self._session.get(
+            self._url(f"/datasets/{tid}/scenarios"),
+            params=params,
+            timeout=self.timeout,
+        )
+        self._raise_for_status(resp)
+        try:
+            return resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /datasets/.../scenarios") from exc
+
+    def dataset_availability(self, t4dataset_id: str) -> dict:
+        """GET /datasets/{t4dataset_id}/availability — whether the dataset is on disk for this server.
+
+        Typical JSON: ``t4dataset_id``, ``available`` (bool), ``dataset_path`` (str or null).
+        """
+        from urllib.parse import quote
+
+        tid = quote(str(t4dataset_id), safe="")
+        resp = self._session.get(
+            self._url(f"/datasets/{tid}/availability"),
+            timeout=self.timeout,
+        )
+        self._raise_for_status(resp)
+        try:
+            return resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /datasets/.../availability") from exc
+
+    def render(self, payload: RenderRequest) -> RenderResult:
+        """POST /render with a :class:`RenderRequest`."""
+        body = render_request_to_json_body(payload)
+        resp = self._session.post(
+            self._url("/render"),
+            json=body,
+            timeout=self.timeout,
+        )
+        self._raise_for_status(resp)
+        try:
+            data = resp.json()
+        except ValueError as exc:
+            raise T4VisualizerError("Invalid JSON from /render") from exc
+
+        try:
+            images_raw = data["images"]
+            imgs = [
+                ImageResult(label=str(x["label"]), png_base64=str(x["png_base64"]))
+                for x in images_raw
+            ]
+
+            def _opt_float(key: str) -> Optional[float]:
+                v = data.get(key)
+                if v is None:
+                    return None
+                return float(v)
+
+            return RenderResult(
+                sample_token=str(data["sample_token"]),
+                timestamp_us=int(data["timestamp_us"]),
+                images=imgs,
+                raw_json=dict(data),
+                elapsed_ms=_opt_float("elapsed_ms"),
+                tier4_load_ms=_opt_float("tier4_load_ms"),
+                render_ms=_opt_float("render_ms"),
+            )
+        except (KeyError, TypeError, ValueError) as exc:
+            raise T4VisualizerError(f"Unexpected /render response shape: {data!r}") from exc
diff --git a/evaluation_dashboard_app/lib/tlr_eval_analyzer.py b/evaluation_dashboard_app/lib/tlr_eval_analyzer.py
index 437e8df..ac88053 100644
--- a/evaluation_dashboard_app/lib/tlr_eval_analyzer.py
+++ b/evaluation_dashboard_app/lib/tlr_eval_analyzer.py
@@ -14,6 +14,11 @@
 import pandas as pd
 import numpy as np
 
+try:
+    import yaml
+except ImportError:  # pragma: no cover - optional dependency fallback
+    yaml = None
+
 
 def _obj_to_dict(obj: Any) -> Any:
     """Recursively convert an object to dict/list primitives for TLR frame structure."""
@@ -34,6 +39,8 @@ class TLREvaluationAnalyzer:
     def __init__(self, result_directory: str):
         self.result_directory = result_directory
         self.scenario_results: Dict[str, List[Dict]] = {}
+        self.scenario_paths: Dict[str, Path] = {}
+        self.scenario_metadata: Dict[str, Dict[str, str]] = {}
         self.criteria_data: Dict[str, Dict] = {}
         self.cached_vehicle_statuses: Dict[str, List[Dict]] = {}
         self.cached_traffic_light_data: Dict[str, List[Dict]] = {}
@@ -58,18 +65,21 @@ def load_all_results_from_pkl(self) -> None:
             frames = self._load_pkl_scenario(child)
             if frames:
                 self.scenario_results[child.name] = frames
+                self.scenario_paths[child.name] = child
         # Also support flat layout: root contains *.pkl.z (e.g. archive) — each file = one scenario
         if not self.scenario_results:
             for pkl_path in root.glob("*.pkl.z"):
                 frames = self._load_single_pkl_file(pkl_path)
                 if frames:
                     self.scenario_results[pkl_path.stem] = frames
+                    self.scenario_paths[pkl_path.stem] = pkl_path.parent
             for pkl_path in root.glob("*.pkl"):
                 if pkl_path.name == "scene_result.pkl":
                     continue  # already handled as child/scene_result.pkl
                 frames = self._load_single_pkl_file(pkl_path)
                 if frames:
                     self.scenario_results[pkl_path.stem] = frames
+                    self.scenario_paths[pkl_path.stem] = pkl_path.parent
 
     def _load_pkl_scenario(self, scenario_path: Path) -> List[Dict]:
         """Load one scenario dir: scene_result.pkl or first .pkl.z in that dir."""
@@ -160,6 +170,7 @@ def load_all_results_from_json(self) -> None:
             if result_file.exists():
                 # Flat: direct child has result.json
                 self.scenario_results[child.name] = self._load_result_jsonl(os.fspath(result_file))
+                self.scenario_paths[child.name] = child
             else:
                 # Suite: child is a suite folder; look for testcase subdirs with result.json
                 for testcase_dir in child.iterdir():
@@ -169,6 +180,7 @@ def load_all_results_from_json(self) -> None:
                     if tc_result.exists():
                         scenario_key = f"{child.name}/{testcase_dir.name}"
                         self.scenario_results[scenario_key] = self._load_result_jsonl(os.fspath(tc_result))
+                        self.scenario_paths[scenario_key] = testcase_dir
 
     def _load_result_jsonl(self, file_path: str) -> List[Dict]:
         """Load and parse result.json (JSONL format)."""
@@ -664,12 +676,51 @@ def _matches_criteria_range_critical_priority(self, criteria_range: str, tlr_typ
             return True
         return False
 
+    def _get_scenario_metadata(self, scenario_name: str) -> Dict[str, str]:
+        """Load lightweight scenario metadata such as t4dataset_id from scenario.yaml."""
+        if scenario_name in self.scenario_metadata:
+            return self.scenario_metadata[scenario_name]
+
+        metadata: Dict[str, str] = {"t4dataset_id": ""}
+        scenario_path = self.scenario_paths.get(scenario_name)
+        scenario_yaml_path = None
+        if scenario_path:
+            direct_yaml_path = scenario_path / "scenario.yaml"
+            if direct_yaml_path.is_file():
+                scenario_yaml_path = direct_yaml_path
+            else:
+                # Some TLR layouts store result.json in a suffixed sibling dir
+                # (for example ``ScenarioName_4038db04``) while ``scenario.yaml``
+                # lives in the unsuffixed directory next to it.
+                base_name = scenario_path.name
+                if re.fullmatch(r".*_[0-9a-fA-F]{8}", base_name):
+                    sibling_yaml_path = scenario_path.parent / base_name.rsplit("_", 1)[0] / "scenario.yaml"
+                    if sibling_yaml_path.is_file():
+                        scenario_yaml_path = sibling_yaml_path
+        if scenario_yaml_path and scenario_yaml_path.is_file() and yaml is not None:
+            try:
+                with open(scenario_yaml_path, "r", encoding="utf-8") as f:
+                    scenario_doc = yaml.safe_load(f) or {}
+                datasets = scenario_doc.get("Evaluation", {}).get("Datasets", [])
+                if isinstance(datasets, list):
+                    dataset_ids = []
+                    for item in datasets:
+                        if isinstance(item, dict):
+                            dataset_ids.extend(str(k) for k in item.keys())
+                    metadata["t4dataset_id"] = ", ".join(dataset_ids)
+            except (OSError, yaml.YAMLError):
+                pass
+
+        self.scenario_metadata[scenario_name] = metadata
+        return metadata
+
     def get_vehicle_status_details_df(self) -> pd.DataFrame | None:
         """Return a DataFrame of per-frame vehicle status and TLR info for all scenarios."""
         all_status_data = []
         for scenario_name, results in self.scenario_results.items():
             if not results:
                 continue
+            scenario_metadata = self._get_scenario_metadata(scenario_name)
             vehicle_statuses = self.cached_vehicle_statuses.get(scenario_name)
             traffic_light_data = self.cached_traffic_light_data.get(scenario_name)
             if not vehicle_statuses or not traffic_light_data:
@@ -678,6 +729,7 @@ def get_vehicle_status_details_df(self) -> pd.DataFrame | None:
             for i, (frame_status_info, tlr_info) in enumerate(zip(vehicle_statuses, traffic_light_data)):
                 all_status_data.append({
                     "scenario": scenario_name,
+                    "t4dataset_id": scenario_metadata.get("t4dataset_id", ""),
                     "frame_index": i,
                     "frame_name": tlr_info.get("frame", ""),
                     "status": frame_status_info["status"],
diff --git a/evaluation_dashboard_app/lib/ui/__init__.py b/evaluation_dashboard_app/lib/ui/__init__.py
index 6bae170..762606b 100644
--- a/evaluation_dashboard_app/lib/ui/__init__.py
+++ b/evaluation_dashboard_app/lib/ui/__init__.py
@@ -35,6 +35,8 @@
 )
 from lib.ui.styles_download import inject_download_page_styles
 from lib.ui.styles_global import inject_app_page_styles
+from lib.ui.task_history import get_task_list_current_user, render_task_detail_content, render_task_list
+from lib.ui.task_result_summary import render_summary_table, render_task_result_summary
 
 __all__ = [
     "ImpressiveProgressHUD",
@@ -62,6 +64,11 @@
     "render_job_json_summary_panel",
     "render_recent_scenario_downloads_intro",
     "render_scenario_download_summary_panel",
+    "get_task_list_current_user",
+    "render_summary_table",
+    "render_task_detail_content",
+    "render_task_list",
+    "render_task_result_summary",
     "render_kpi_card",
     "section_header_html",
 ]
diff --git a/evaluation_dashboard_app/lib/ui/detection_stats.py b/evaluation_dashboard_app/lib/ui/detection_stats.py
index 780d245..b4d0f1d 100644
--- a/evaluation_dashboard_app/lib/ui/detection_stats.py
+++ b/evaluation_dashboard_app/lib/ui/detection_stats.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import html
 from contextlib import contextmanager
 
 import streamlit as st
@@ -272,25 +271,15 @@ def section_header_html(title: str, caption: str = "") -> str:
     return f'<div class="section-header">{title}</div>'
 
 
-def ds_spot_loading_markup(label: str) -> str:
-    """Compact inline HTML: shows where the app is busy (Streamlit runs top-to-bottom, so this “moves” down the page)."""
-    safe = html.escape(label)
-    return f"""<div class="ds-spot-loader" role="status" aria-live="polite">
-  <span class="ds-spot-ping" aria-hidden="true"></span>
-  <span class="ds-spot-working">Working here</span>
-  <span class="ds-spot-label">{safe}</span>
-  <span class="ds-spot-bar"><span class="ds-spot-bar-inner"></span></span>
-</div>"""
+def ds_spot_loading_markup(_label: str) -> str:
+    """Spot loader HTML disabled (was: “Working here” + label); returns empty string."""
+    return ""
 
 
 @contextmanager
-def ds_spot_loading(label: str):
-    slot = st.empty()
-    slot.markdown(ds_spot_loading_markup(label), unsafe_allow_html=True)
-    try:
-        yield
-    finally:
-        slot.empty()
+def ds_spot_loading(_label: str):
+    """Spot loader context manager disabled (no-op); kept for call-site compatibility."""
+    yield
 
 def detection_stats_page_loading_banner_markup() -> str:
     """Top-of-page banner while queries and charts stream in."""
diff --git a/evaluation_dashboard_app/lib/ui/download_ui.py b/evaluation_dashboard_app/lib/ui/download_ui.py
index 100d89f..28c6c23 100644
--- a/evaluation_dashboard_app/lib/ui/download_ui.py
+++ b/evaluation_dashboard_app/lib/ui/download_ui.py
@@ -127,15 +127,15 @@ def render_download_hero(*, queue_enabled: bool) -> None:
     )
 
 
-def render_download_task_section_header(*, since_days: int = 7, max_rows: int = 200) -> None:
+def render_download_task_section_header(*, since_days: Optional[int] = 7, max_rows: int = 200) -> None:
     """Lightweight title for the worker task list (no extra card chrome — task rows are the cards)."""
-    days = int(since_days)
     cap = int(max_rows)
     st.subheader("Recent tasks")
-    st.caption(
-        f"Queued/running jobs below; completed or failed in **Task history**. "
-        f"Last **{days}** days, up to **{cap}** rows."
-    )
+    if since_days is None:
+        window = "All time"
+    else:
+        window = f"Last **{int(since_days)}** days"
+    st.caption(f"Queued/running jobs below; completed or failed in **Task history**. {window}, up to **{cap}** rows.")
 
 
 def _coerce_progress_fraction(progress_pct: Optional[Any]) -> Optional[float]:
diff --git a/evaluation_dashboard_app/lib/ui/recent_evaluator_jobs.py b/evaluation_dashboard_app/lib/ui/recent_evaluator_jobs.py
new file mode 100644
index 0000000..933f040
--- /dev/null
+++ b/evaluation_dashboard_app/lib/ui/recent_evaluator_jobs.py
@@ -0,0 +1,2087 @@
+"""Shared Recent Evaluator Jobs UI."""
+
+from __future__ import annotations
+
+import html
+import os
+import urllib.parse
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+
+import pandas as pd
+import requests
+import streamlit as st
+
+from lib import evaluator_api
+from lib.path_utils import resolve_under_data_root, to_data_relative
+
+_JST = timezone(timedelta(hours=9))
+_CONFIG_GETTER: Callable[[str, Any], Any] = lambda key, default=None: default
+_CONFIG_SETTER: Callable[[str, Any], None] = lambda key, value: None
+_ENQUEUE_TASK: Callable[[str, Dict[str, Any]], Optional[str]] = lambda task_type, params: None
+CATALOG_IO_AVAILABLE = False
+ENVIRONMENT = "default"
+_DEFAULT_EVAL_WORKERS = 4
+
+
+def _default_eval_workers() -> int:
+    try:
+        workers = int(os.environ.get("EVAL_WORKERS_DEFAULT", _DEFAULT_EVAL_WORKERS))
+    except (TypeError, ValueError):
+        workers = _DEFAULT_EVAL_WORKERS
+    return max(1, min(workers, 16))
+
+
+def configure_recent_evaluator_jobs_ui(*, get_config_value: Callable[[str, Any], Any], set_config_value: Callable[[str, Any], None], enqueue_task: Callable[[str, Dict[str, Any]], Optional[str]], catalog_io_available: bool, environment: str = "default") -> None:
+    global _CONFIG_GETTER, _CONFIG_SETTER, _ENQUEUE_TASK, CATALOG_IO_AVAILABLE, ENVIRONMENT
+    _CONFIG_GETTER = get_config_value
+    _CONFIG_SETTER = set_config_value
+    _ENQUEUE_TASK = enqueue_task
+    CATALOG_IO_AVAILABLE = bool(catalog_io_available)
+    ENVIRONMENT = environment or "default"
+
+
+def get_config_value(key: str, default: Any = None) -> Any:
+    return _CONFIG_GETTER(key, default)
+
+
+def set_config_value(key: str, value: Any) -> None:
+    _CONFIG_SETTER(key, value)
+
+
+def _enqueue_task(task_type: str, params: Dict[str, Any]) -> Optional[str]:
+    return _ENQUEUE_TASK(task_type, params)
+
+
+def _friendly_request_error_message(exc: Exception) -> str:
+    text = str(exc or "").strip()
+    lowered = text.lower()
+    if "temporary failure in name resolution" in lowered or "failed to resolve" in lowered or "name resolution" in lowered:
+        return "Could not load evaluator jobs because the network appears to be unavailable."
+    if "auth.web.auto" in lowered or "/token" in lowered:
+        return "Could not load evaluator jobs because the sign-in service is currently unavailable."
+    if "connection refused" in lowered or "max retries exceeded" in lowered or "newconnectionerror" in lowered:
+        return "Could not connect to the evaluator service right now. Please try again in a moment."
+    if "timed out" in lowered or "timeout" in lowered:
+        return "Loading evaluator jobs took too long. Please try again."
+    return "Could not load evaluator jobs right now. Please check the network connection and try again."
+
+
+def _load_catalog_presets() -> List[Dict[str, str]]:
+    """Load catalog presets from the app-level catalogs.json file if available."""
+    app_root = Path(__file__).resolve().parents[2]
+    search_paths = [
+        app_root / "catalogs.json",
+        Path(os.environ.get("CATALOGS_PATH", "")),
+        Path.cwd() / "catalogs.json",
+    ]
+    for path in search_paths:
+        if not path or not str(path):
+            continue
+        try:
+            if not path.exists() or not path.is_file():
+                continue
+            import json
+
+            with path.open("r", encoding="utf-8") as handle:
+                data = json.load(handle)
+            raw_catalogs = data.get("catalogs", []) if isinstance(data, dict) else data
+            presets: List[Dict[str, str]] = []
+            for item in raw_catalogs or []:
+                if not isinstance(item, dict):
+                    continue
+                display_name = (
+                    str(item.get("display_name") or item.get("name") or item.get("catalog_id") or "")
+                    .strip()
+                )
+                if not display_name:
+                    continue
+                presets.append({**item, "display_name": display_name})
+            return presets
+        except Exception:
+            continue
+    return []
+
+
+def _retest_catalog_emoji(preset_name: str, *, has_custom_catalog: bool = False) -> str:
+    mapping = {
+        "Build Test Catalog": "🛠️",
+        "Performance Test": "📈",
+        "Old performance test": "🕰️",
+        "Devops Test": "⚙️",
+        "Usecase Performance Catalog": "🧭",
+        "L4 regression test": "⚠️",
+    }
+    normalized = str(preset_name or "").strip()
+    if normalized in mapping:
+        return mapping[normalized]
+    if has_custom_catalog:
+        return "🧩"
+    return "📦"
+
+
+def _make_retest_description(target_name: str, preset_name: str = "", *, has_custom_catalog: bool = False) -> str:
+    clean_target = " ".join(str(target_name or "").strip().split()) or "artifact"
+    stamp = datetime.now().strftime("%m-%d %H:%M")
+    return (
+        f"♻️ evaluator artifact retest [{clean_target}] [{stamp}] "
+        f"{_retest_catalog_emoji(preset_name, has_custom_catalog=has_custom_catalog)}"
+    )
+
+
+def _retest_suite_selection_key(job_id: str) -> str:
+    return f"recent_eval_retest_suite_selection_{job_id}"
+
+
+def _to_jst(dt: Any) -> Optional[datetime]:
+    if dt is None:
+        return None
+    if not hasattr(dt, "astimezone"):
+        return None
+    try:
+        if getattr(dt, "tzinfo", None) is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt.astimezone(_JST)
+    except Exception:
+        return None
+
+def _parse_api_dt(value: Any) -> Optional[datetime]:
+    """Parse evaluator API timestamps into timezone-aware datetimes."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        if getattr(value, "tzinfo", None) is None:
+            return value.replace(tzinfo=timezone.utc)
+        return value
+    try:
+        text = str(value).strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        dt = datetime.fromisoformat(text)
+        if getattr(dt, "tzinfo", None) is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt
+    except Exception:
+        return None
+
+
+def _format_jst_time(value: Any, *, include_seconds: bool = False) -> str:
+    """Format timestamps for display in JST."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "—"
+    return dt.strftime("%Y-%m-%d %H:%M:%S JST" if include_seconds else "%Y-%m-%d %H:%M JST")
+
+
+def _format_jst_time_compact(value: Any) -> str:
+    """Compact timestamp for dense recent-job rows."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "—"
+    return dt.strftime("%m-%d %H:%M")
+
+
+def _format_jst_time_title(value: Any) -> str:
+    """Readable timestamp for fallback job titles."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "unknown time"
+    return f"{dt.year}/{dt.month}/{dt.day} {dt.hour}:{dt.minute:02d}:{dt.second:02d}"
+
+
+def _format_relative_time(value: Any) -> str:
+    """Human-friendly age/duration from a timestamp until now."""
+    dt = _parse_api_dt(value)
+    if not dt:
+        return "—"
+    now = datetime.now(timezone.utc)
+    secs = max(0, int((now - dt.astimezone(timezone.utc)).total_seconds()))
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _format_duration(start_value: Any, end_value: Any) -> str:
+    """Format elapsed duration between two evaluator timestamps."""
+    start = _parse_api_dt(start_value)
+    end = _parse_api_dt(end_value)
+    if not start or not end:
+        return "—"
+    secs = max(0, int((end - start).total_seconds()))
+    if secs < 60:
+        return f"{secs}s"
+    if secs < 3600:
+        return f"{secs // 60}m {secs % 60}s"
+    return f"{secs // 3600}h {(secs % 3600) // 60}m"
+
+
+def _extract_git_target(report: Dict[str, Any]) -> str:
+    """Return a compact branch/tag label from evaluator job report metadata."""
+    source = ((report.get("event") or {}).get("source") or {})
+    git_ref = str(source.get("git_ref") or "").strip()
+    if git_ref.startswith("refs/heads/"):
+        return git_ref[len("refs/heads/"):]
+    if git_ref.startswith("refs/tags/"):
+        return git_ref[len("refs/tags/"):]
+    return git_ref or str(source.get("git_sha") or "").strip()[:12] or "—"
+
+
+def _extract_catalog_url(report: Dict[str, Any]) -> str:
+    """Return a best-effort catalog URL for linking from recent evaluator jobs."""
+    catalog = report.get("catalog") or {}
+    direct_url = str(
+        catalog.get("web_url")
+        or catalog.get("url")
+        or catalog.get("catalog_url")
+        or ""
+    ).strip()
+    if direct_url:
+        return direct_url
+
+    project_id = str(report.get("project_id") or "").strip()
+    catalog_id = str(
+        catalog.get("catalog_id")
+        or catalog.get("id")
+        or ""
+    ).strip()
+    if project_id and catalog_id:
+        return f"https://evaluation.tier4.jp/evaluation/vehicle_catalogs/{catalog_id}?project_id={project_id}"
+    return ""
+
+
+def _extract_job_title(report: Dict[str, Any]) -> str:
+    """Prefer evaluator description for display title, with a readable fallback."""
+    description = str(report.get("description") or "").strip()
+    if description:
+        return description
+    started_like = report.get("started_at") or report.get("scheduled_at") or report.get("finished_at")
+    return f"no description (Started at {_format_jst_time_title(started_like)})"
+
+
+def _extract_case_totals(report: Dict[str, Any]) -> Dict[str, int]:
+    """Return total/success/failed/canceled counts from job report."""
+    test = report.get("test") or {}
+    result = test.get("available_case_results") or test.get("case_results") or {}
+    return {
+        "total": int(result.get("total_count", 0) or 0),
+        "success": int(result.get("success_count", 0) or 0),
+        "failed": int(result.get("failure_count", 0) or 0),
+        "canceled": int(result.get("cancellation_count", 0) or 0),
+    }
+
+
+def _extract_failed_case_rows(case_reports: List[Dict[str, Any]], *, limit: int = 50) -> List[Dict[str, Any]]:
+    """Normalize failed case rows for display tables."""
+    rows: List[Dict[str, Any]] = []
+    for report in case_reports:
+        status = str(report.get("status") or "").strip().lower()
+        result_status = str(((report.get("result") or {}).get("status") or "")).strip().lower()
+        if status not in evaluator_api.FAILED_JOB_STATUSES and result_status not in evaluator_api.FAILED_JOB_STATUSES:
+            continue
+        logs = report.get("logs") or {}
+        rows.append(
+            {
+                "Suite": ((report.get("suite") or {}).get("display_name") or ""),
+                "Scenario": ((report.get("scenario") or {}).get("display_name") or ""),
+                "Status": report.get("status", ""),
+                "Fail message": report.get("fail_message", ""),
+                "Cause": ", ".join(report.get("failure_cause_labels", []) or []),
+                "Archive log": "yes" if ((logs.get("simulation_archive") or {}).get("id")) else "no",
+                "Result JSON": "yes" if ((logs.get("simulation_result_json") or {}).get("id")) else "no",
+            }
+        )
+    rows.sort(key=lambda row: (row["Suite"], row["Scenario"], row["Fail message"]))
+    return rows[:limit]
+
+
+def _extract_suite_rows(suite_rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Normalize suite summary rows for display tables."""
+    rows = [
+        {
+            "Suite": row.get("name", ""),
+            "Total": int(row.get("all", 0) or 0),
+            "Success": int(row.get("success", 0) or 0),
+            "Failed": int(row.get("fail", 0) or 0),
+            "Canceled": int(row.get("cancel", 0) or 0),
+            "Simulation": row.get("simulation", ""),
+            "Report": row.get("url", ""),
+        }
+        for row in suite_rows or []
+    ]
+    rows.sort(key=lambda row: (-row["Failed"], row["Suite"]))
+    return rows
+
+
+def _extract_suite_selection_options(suite_rows: List[Dict[str, Any]]) -> List[Dict[str, str]]:
+    """Build suite picker options from evaluator suite summary rows."""
+    options: List[Dict[str, str]] = []
+    seen_ids = set()
+    for row in suite_rows or []:
+        report_url = str(row.get("url") or row.get("Report") or "").strip()
+        suite_id = ""
+        if "/tests/" in report_url:
+            tail = report_url.split("/tests/", 1)[1]
+            suite_id = tail.split("?", 1)[0].split("/", 1)[0].strip()
+        if not suite_id or suite_id in seen_ids:
+            continue
+        seen_ids.add(suite_id)
+        suite_name = str(row.get("name") or row.get("Suite") or suite_id).strip()
+        options.append({"id": suite_id, "label": f"{suite_name} ({suite_id})"})
+    return options
+
+
+def _short_git_sha(sha: str, *, length: int = 8) -> str:
+    return str(sha or "").strip()[: max(1, int(length))]
+
+
+def _format_source_ref_text(source_label: str, git_sha: str) -> str:
+    label = str(source_label or "").strip()
+    short_sha = _short_git_sha(git_sha)
+    if label and short_sha:
+        return f"{label} ({short_sha})"
+    return label or short_sha or "—"
+
+
+def _format_source_ref_html(
+    source_label: str,
+    source_url: str,
+    git_sha: str,
+    git_commit_url: str,
+) -> str:
+    label = html.escape(str(source_label or "").strip() or "—")
+    ref_url = html.escape(str(source_url or "").strip())
+    short_sha = html.escape(_short_git_sha(git_sha))
+    commit_url = html.escape(str(git_commit_url or "").strip())
+
+    if ref_url and label != "—":
+        label_html = f'<a href="{ref_url}" target="_blank" rel="noopener noreferrer">{label}</a>'
+    else:
+        label_html = label
+
+    if short_sha:
+        sha_html = (
+            f'<a href="{commit_url}" target="_blank" rel="noopener noreferrer">{short_sha}</a>'
+            if commit_url
+            else short_sha
+        )
+        if label_html and label_html != "—":
+            return f"{label_html} ({sha_html})"
+        return sha_html
+
+    return label_html
+
+
+def _extract_retest_parent_job_id(report: Dict[str, Any]) -> str:
+    """Return the upstream source_job_id when this evaluator job was itself a retest."""
+    event = report.get("event") or {}
+    candidates = (
+        event.get("source_job_id"),
+        ((event.get("source_job") or {}).get("id") if isinstance(event.get("source_job"), dict) else ""),
+        report.get("source_job_id"),
+    )
+    for candidate in candidates:
+        value = str(candidate or "").strip()
+        if value:
+            return value
+    return ""
+
+
+def _resolve_retest_source_job_id(
+    project_id: str,
+    environment: str,
+    job_id: str,
+    *,
+    detail: Optional[Dict[str, Any]] = None,
+    max_depth: int = 5,
+) -> str:
+    """Unwrap retest chains so scheduling reuses the earliest known source job."""
+    current_job_id = str(job_id or "").strip()
+    current_detail = detail or {}
+    seen_job_ids: set[str] = set()
+
+    while current_job_id and current_job_id not in seen_job_ids and len(seen_job_ids) < max_depth:
+        seen_job_ids.add(current_job_id)
+        raw_report = current_detail.get("raw_report") if isinstance(current_detail, dict) else {}
+        parent_job_id = _extract_retest_parent_job_id(raw_report or {})
+        if not parent_job_id or parent_job_id in seen_job_ids:
+            return current_job_id
+        current_job_id = parent_job_id
+        try:
+            current_detail = _fetch_evaluator_job_detail(project_id, environment, current_job_id)
+        except Exception:
+            return current_job_id
+
+    return current_job_id or str(job_id or "").strip()
+
+
+def _status_color_variant(status: str) -> str:
+    """Map evaluator status to a style token used by the recent-job cards."""
+    normalized = evaluator_api.normalize_job_status(status)
+    if normalized in evaluator_api.SUCCESS_JOB_STATUSES:
+        return "success"
+    if normalized in ("canceled", "cancelled", "aborted"):
+        return "canceled"
+    if normalized in evaluator_api.FAILED_JOB_STATUSES:
+        return "failed"
+    if normalized in ("started", "running", "pending", "queued", "created"):
+        return "running"
+    return "unknown"
+
+
+def _status_display_label(status: str) -> str:
+    """Short status label for compact list rows."""
+    normalized = evaluator_api.normalize_job_status(status)
+    if normalized in ("succeeded", "success"):
+        return "success"
+    if normalized in ("failed", "failure", "error"):
+        return "failed"
+    if normalized in ("canceled", "cancelled", "aborted"):
+        return "canceled"
+    if normalized in ("started", "running"):
+        return "running"
+    if normalized in ("pending", "queued", "created"):
+        return "queued"
+    return normalized or "unknown"
+
+
+def _status_filter_values(selected_statuses: List[str]) -> List[str]:
+    """Normalize UI status filters into API status values."""
+    values: List[str] = []
+    for raw in selected_statuses:
+        normalized = evaluator_api.normalize_job_status(raw)
+        if normalized == "unknown" or not normalized:
+            continue
+        if normalized == "running":
+            values.extend(["running", "started"])
+        elif normalized == "success":
+            values.extend(["success", "succeeded"])
+        elif normalized == "failed":
+            values.extend(["failed", "failure", "error"])
+        elif normalized == "canceled":
+            values.extend(["canceled", "cancelled", "aborted"])
+        else:
+            values.append(normalized)
+    return sorted(set(values))
+
+
+def _escape_search_match_value(value: str) -> str:
+    """Escape wildcard characters for API Match filters."""
+    return (
+        value.replace("\\", "\\\\")
+        .replace("*", "\\*")
+        .replace("?", "\\?")
+    )
+
+
+def _build_recent_job_search_filter(
+    search_text: str,
+    search_scope: str,
+    user_directory: Optional[Dict[str, Dict[str, str]]] = None,
+) -> tuple[Optional[Dict[str, Any]], str]:
+    """Map quick-search UI to one server-side filter and a client-side needle."""
+    needle = search_text.strip()
+    if not needle:
+        return None, ""
+
+    if search_scope == "Branch/tag":
+        return (
+            {
+                "field": "event.source.git_ref",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Description":
+        return (
+            {
+                "field": "description",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Job ID":
+        return (
+            {
+                "field": "job_id",
+                "operator": "In",
+                "values": [needle],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Git SHA":
+        return (
+            {
+                "field": "event.source.git_sha",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Fail message":
+        return (
+            {
+                "field": "fail_message",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    return None, needle.lower()
+
+
+def _recent_job_search_history_key(scope: str) -> str:
+    return f"recent_eval_jobs_search_history::{scope}"
+
+
+def _get_recent_job_search_history(scope: str) -> List[str]:
+    stored = get_config_value(_recent_job_search_history_key(scope), []) or []
+    if not isinstance(stored, list):
+        return []
+    return [str(v).strip() for v in stored if str(v).strip()]
+
+
+def _save_recent_job_search_history(scope: str, value: str, *, max_items: int = 8) -> None:
+    text = str(value).strip()
+    if not text:
+        return
+    history = _get_recent_job_search_history(scope)
+    updated = [text] + [item for item in history if item != text]
+    set_config_value(_recent_job_search_history_key(scope), updated[:max_items])
+
+
+def _get_recent_eval_user_directory() -> Dict[str, Dict[str, str]]:
+    stored = get_config_value("recent_eval_jobs_user_directory", {}) or {}
+    if not isinstance(stored, dict):
+        return {}
+    normalized: Dict[str, Dict[str, str]] = {}
+    for subject_id, info in stored.items():
+        if not isinstance(info, dict):
+            continue
+        normalized[str(subject_id)] = {
+            "name": str(info.get("name") or "").strip(),
+            "email": str(info.get("email") or "").strip(),
+            "subject_id": str(info.get("subject_id") or subject_id).strip(),
+        }
+    return normalized
+
+
+def _save_recent_eval_user_directory(directory: Dict[str, Dict[str, str]]) -> None:
+    set_config_value("recent_eval_jobs_user_directory", directory)
+
+
+@st.cache_data(ttl=24 * 3600, show_spinner=False)
+def _fetch_auth_member_profile(subject_id: str, environment: str) -> Dict[str, str]:
+    subject = str(subject_id or "").strip()
+    if not subject:
+        return {}
+    org_id = os.environ.get(
+        "WEBAUTO_ORGANIZATION_ID",
+        "5a21621d-6968-4f7d-94f8-99cfb77b6e71",
+    ).strip()
+    if not org_id:
+        return {"subject_id": subject, "name": subject, "email": ""}
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    from webautoauth.token import HttpService, TokenSource, load_config
+
+    config = load_config()
+    token_source = TokenSource(HttpService(config))
+    access_token = token_source.get_token().access_token
+    quoted_subject = urllib.parse.quote(subject, safe="")
+    url = f"https://auth.web.auto/v2/organizations/{org_id}/members/{quoted_subject}"
+    response = requests.get(
+        url,
+        headers={"Authorization": f"Bearer {access_token}", "accept": "application/json"},
+        timeout=10,
+    )
+    response.raise_for_status()
+    data = response.json()
+    return {
+        "subject_id": str(data.get("subject_id") or subject),
+        "name": str(data.get("name") or subject).strip(),
+        "email": str(data.get("email") or "").strip(),
+    }
+
+
+def _hydrate_recent_eval_user_directory(
+    jobs: List[Dict[str, Any]],
+    environment: str,
+) -> Dict[str, Dict[str, str]]:
+    directory = _get_recent_eval_user_directory()
+    unresolved = sorted(
+        {
+            str(job.get("scheduled_by") or "").strip()
+            for job in jobs
+            if str(job.get("scheduled_by") or "").strip()
+            and str(job.get("scheduled_by") or "").strip() not in directory
+        }
+    )
+    if not unresolved:
+        return directory
+
+    updates: Dict[str, Dict[str, str]] = {}
+    with ThreadPoolExecutor(max_workers=min(6, len(unresolved))) as executor:
+        future_map = {
+            executor.submit(_fetch_auth_member_profile, subject_id, environment): subject_id
+            for subject_id in unresolved
+        }
+        for future in as_completed(future_map):
+            subject_id = future_map[future]
+            try:
+                profile = future.result()
+            except Exception:
+                profile = {
+                    "subject_id": subject_id,
+                    "name": subject_id,
+                    "email": "",
+                }
+            updates[subject_id] = {
+                "subject_id": str(profile.get("subject_id") or subject_id).strip(),
+                "name": str(profile.get("name") or subject_id).strip(),
+                "email": str(profile.get("email") or "").strip(),
+            }
+
+    if updates:
+        directory = {**directory, **updates}
+        _save_recent_eval_user_directory(directory)
+    return directory
+
+
+def _build_recent_job_date_filters(
+    date_from: Optional[datetime.date],
+    date_to: Optional[datetime.date],
+) -> List[Dict[str, Any]]:
+    """Build scheduled_at date-range filters for the search API."""
+    filters: List[Dict[str, Any]] = []
+    if date_from:
+        start_dt = datetime(date_from.year, date_from.month, date_from.day, 0, 0, 0, tzinfo=_JST)
+        filters.append(
+            {
+                "field": "scheduled_at",
+                "operator": "Gte",
+                "values": [start_dt.astimezone(timezone.utc).isoformat()],
+            }
+        )
+    if date_to:
+        end_dt = datetime(date_to.year, date_to.month, date_to.day, 23, 59, 59, tzinfo=_JST)
+        filters.append(
+            {
+                "field": "scheduled_at",
+                "operator": "Lte",
+                "values": [end_dt.astimezone(timezone.utc).isoformat()],
+            }
+        )
+    return filters
+
+
+def _summarize_recent_job(report: Dict[str, Any]) -> Dict[str, Any]:
+    """Compact summary for one evaluator job card."""
+    status = evaluator_api.extract_job_status(report)
+    totals = _extract_case_totals(report)
+    source = ((report.get("event") or {}).get("source") or {})
+    git_url = str(source.get("git_web_url") or source.get("git_url") or "").strip()
+    source_repo_label = git_url.rstrip("/").split("/")[-1] if git_url else "—"
+    git_ref_label = _extract_git_target(report)
+    return {
+        "job_id": report.get("job_id") or report.get("id") or "",
+        "title": _extract_job_title(report),
+        "status": status,
+        "status_variant": _status_color_variant(status),
+        "build_status": ((report.get("build") or {}).get("status") or ""),
+        "test_status": ((report.get("test") or {}).get("status") or ""),
+        "target": git_ref_label,
+        "catalog": ((report.get("catalog") or {}).get("display_name") or ""),
+        "catalog_url": _extract_catalog_url(report),
+        "description": report.get("description", ""),
+        "source_label": git_ref_label,
+        "source_repo_label": source_repo_label,
+        "scheduled_at": report.get("scheduled_at"),
+        "started_at": report.get("started_at"),
+        "finished_at": report.get("finished_at"),
+        "duration": _format_duration(report.get("started_at"), report.get("finished_at")),
+        "created_label": _format_relative_time(report.get("scheduled_at") or report.get("started_at")),
+        "scheduled_by": str(report.get("scheduled_by") or ""),
+        "report_url": evaluator_api.get_job_report_url(report.get("project_id", ""), report.get("job_id") or report.get("id") or ""),
+        "fail_message": report.get("fail_message", ""),
+        "total": totals["total"],
+        "success": totals["success"],
+        "failed": totals["failed"],
+        "canceled": totals["canceled"],
+        "git_sha": str(source.get("git_sha") or "")[:12],
+        "git_ref_url": source.get("git_ref_url", ""),
+        "git_commit_url": source.get("git_commit_url", ""),
+        "source_url": git_url,
+    }
+
+
+@st.cache_data(ttl=30, show_spinner=False)
+def _fetch_recent_evaluator_job_pages(
+    project_id: str,
+    environment: str,
+    page_size: int,
+    pages_to_fetch: int,
+    status_values: tuple[str, ...] = (),
+    extra_filters: tuple[tuple[str, str, tuple[Any, ...]], ...] = (),
+) -> List[Dict[str, Any]]:
+    """Fetch recent evaluator jobs from the search endpoint page-by-page."""
+    if not project_id:
+        return []
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    api = evaluator_api.EvaluationRunAPI()
+    filters: List[Dict[str, Any]] = []
+    if status_values:
+        filters.append(
+            {
+                "field": "status",
+                "operator": "In",
+                "values": list(status_values),
+            }
+        )
+    for field, operator, values in extra_filters:
+        filters.append(
+            {
+                "field": field,
+                "operator": operator,
+                "values": list(values),
+            }
+        )
+    next_token = ""
+    pages: List[Dict[str, Any]] = []
+    for _ in range(max(1, int(pages_to_fetch))):
+        data = api.search_report_list(
+            project_id,
+            filters=filters or None,
+            next_token=next_token,
+            size=max(1, min(int(page_size), 100)),
+        )
+        reports = data.get("reports", []) or []
+        pages.append(
+            {
+                "jobs": [_summarize_recent_job(report) for report in reports],
+                "next_token": data.get("next_token", "") or "",
+            }
+        )
+        next_token = data.get("next_token", "") or ""
+        if not next_token:
+            break
+    return pages
+
+
+@st.cache_data(ttl=30, show_spinner=False)
+def _fetch_evaluator_job_detail(project_id: str, environment: str, job_id: str) -> Dict[str, Any]:
+    """Fetch deep evaluator detail for one job on demand."""
+    if not project_id or not job_id:
+        return {}
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    api = evaluator_api.EvaluationRunAPI()
+    report = api.get_job_report(project_id, job_id)
+    suite_rows = api.get_suite_summary(project_id, job_id, use_available_case_results=True)
+    case_reports = api.get_case_reports(project_id, job_id)
+    summary = _summarize_recent_job(report)
+    return {
+        **summary,
+        "suite_rows": _extract_suite_rows(suite_rows),
+        "failed_case_rows": _extract_failed_case_rows(case_reports),
+        "raw_report": report,
+    }
+
+
+def _inject_recent_evaluator_jobs_styles() -> None:
+    """Task-adjacent styles for the recent evaluator jobs section."""
+    st.markdown(
+        """
+        <style>
+        .evj-card {
+            border-radius: 16px;
+            padding: 0.7rem 0.85rem;
+            border: 1px solid rgba(148, 163, 184, 0.22);
+            background: rgba(255, 255, 255, 0.92);
+            box-shadow: 0 8px 20px rgba(15, 23, 42, 0.05);
+        }
+        .evj-card--running {
+            border-color: rgba(245, 158, 11, 0.28);
+            background: linear-gradient(180deg, rgba(255, 251, 235, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-card--success {
+            border-color: rgba(16, 185, 129, 0.24);
+            background: linear-gradient(180deg, rgba(236, 253, 245, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-card--failed {
+            border-color: rgba(239, 68, 68, 0.24);
+            background: linear-gradient(180deg, rgba(254, 242, 242, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-top, .evj-meta, .evj-stats {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            flex-wrap: wrap;
+        }
+        .evj-top { justify-content: space-between; }
+        .evj-row {
+            display: grid;
+            grid-template-columns: minmax(180px, 1.25fr) minmax(86px, 0.48fr) minmax(172px, 0.95fr) minmax(170px, 1.05fr) minmax(120px, 0.8fr) minmax(160px, 1fr);
+            gap: 8px;
+            align-items: center;
+        }
+        .evj-title {
+            font-size: 0.9rem;
+            font-weight: 800;
+            color: #0f172a;
+            margin: 0;
+            word-break: break-word;
+        }
+        .evj-title a {
+            color: inherit;
+            text-decoration: none;
+        }
+        .evj-title a:hover {
+            text-decoration: underline;
+        }
+        .evj-name {
+            min-width: 0;
+        }
+        .evj-name .evj-title,
+        .evj-name .evj-name-sub,
+        .evj-ref-cell,
+        .evj-ref-cell .evj-name-sub {
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+        .evj-name-sub {
+            margin-top: 0.15rem;
+            font-size: 0.74rem;
+            color: #64748b;
+        }
+        .evj-name-sub a {
+            color: inherit;
+            text-decoration: none;
+        }
+        .evj-name-sub a:hover {
+            text-decoration: underline;
+        }
+        .evj-status {
+            display: inline-flex;
+            align-items: center;
+            gap: 5px;
+            padding: 0.24rem 0.5rem;
+            border-radius: 999px;
+            font-size: 0.7rem;
+            font-weight: 800;
+            text-transform: lowercase;
+            letter-spacing: 0.01em;
+            border: 1px solid transparent;
+        }
+        .evj-status--running { color: #9a6700; background: #fff7db; border-color: rgba(245, 158, 11, 0.28); }
+        .evj-status--success { color: #047857; background: #dcfce7; border-color: rgba(16, 185, 129, 0.28); }
+        .evj-status--failed { color: #b91c1c; background: #fee2e2; border-color: rgba(239, 68, 68, 0.28); }
+        .evj-status--canceled { color: #7c3aed; background: #f3e8ff; border-color: rgba(124, 58, 237, 0.24); }
+        .evj-status--unknown { color: #475569; background: #f1f5f9; border-color: rgba(148, 163, 184, 0.28); }
+        .evj-status-mark {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 14px;
+            height: 14px;
+            border-radius: 999px;
+            font-size: 0.62rem;
+            font-weight: 900;
+            line-height: 1;
+            border: 1px solid currentColor;
+            flex: 0 0 auto;
+        }
+        .evj-status-mark--success {
+            background: rgba(4, 120, 87, 0.08);
+        }
+        .evj-status-mark--failed {
+            background: rgba(185, 28, 28, 0.08);
+        }
+        .evj-status-mark--canceled {
+            background: rgba(124, 58, 237, 0.08);
+        }
+        .evj-status-mark--unknown {
+            background: rgba(71, 85, 105, 0.08);
+        }
+        .evj-status-mark--running {
+            position: relative;
+            border-radius: 999px;
+            border: 1.5px solid rgba(154, 103, 0, 0.18);
+            border-top-color: currentColor;
+            border-right-color: currentColor;
+            background: transparent;
+            animation: evj-spin 0.9s linear infinite;
+        }
+        .evj-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 999px;
+            display: inline-block;
+            background: currentColor;
+            opacity: 0.88;
+        }
+        .evj-dot--pulse {
+            animation: evj-pulse 1.4s ease-in-out infinite;
+        }
+        @keyframes evj-pulse {
+            0% { transform: scale(0.9); opacity: 0.55; }
+            50% { transform: scale(1.2); opacity: 1; }
+            100% { transform: scale(0.9); opacity: 0.55; }
+        }
+        @keyframes evj-spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .evj-meta {
+            color: #475569;
+            font-size: 0.82rem;
+        }
+        .evj-list {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+            margin-top: 0.7rem;
+        }
+        .evj-toolbar-note {
+            margin: 0.15rem 0 0.35rem;
+            font-size: 0.72rem;
+            font-weight: 700;
+            letter-spacing: 0.02em;
+            color: #64748b;
+            text-transform: uppercase;
+        }
+        .evj-pager-note {
+            margin-top: 0.28rem;
+            font-size: 0.76rem;
+            color: #475569;
+            white-space: nowrap;
+        }
+        .evj-cell {
+            min-width: 0;
+            font-size: 0.78rem;
+            color: #334155;
+        }
+        .evj-cell a {
+            color: #0f766e;
+            text-decoration: none;
+            font-weight: 700;
+        }
+        .evj-cell a:hover {
+            text-decoration: underline;
+        }
+        .evj-cell strong {
+            color: #0f172a;
+        }
+        .evj-cell--nowrap {
+            white-space: nowrap;
+        }
+        .evj-detail {
+            margin-top: 1rem;
+            padding: 1rem 1rem 0.8rem;
+            border-radius: 18px;
+            border: 1px solid rgba(15, 118, 110, 0.14);
+            background:
+                radial-gradient(circle at top right, rgba(45, 212, 191, 0.10), transparent 24%),
+                linear-gradient(180deg, rgba(255,255,255,0.99), rgba(247,250,252,0.99));
+            box-shadow: 0 14px 30px rgba(15, 23, 42, 0.06);
+        }
+        .evj-stat {
+            flex: 1 1 80px;
+            min-width: 72px;
+            padding: 0.55rem 0.7rem;
+            border-radius: 14px;
+            background: rgba(248, 250, 252, 0.92);
+            border: 1px solid rgba(148, 163, 184, 0.16);
+        }
+        .evj-inline-stats {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 6px;
+            font-size: 0.76rem;
+            color: #334155;
+        }
+        [class*="st-key-recent_eval_view_"] button,
+        [class*="st-key-recent_eval_run_"] button,
+        [class*="st-key-recent_eval_retest_"] button,
+        [class*="st-key-recent_eval_jobs_prev"] button,
+        [class*="st-key-recent_eval_jobs_next"] button,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button,
+        [class*="st-key-refresh_recent_eval_jobs"] button {
+            min-height: 2rem;
+            padding: 0.18rem 0.58rem;
+            border-radius: 999px;
+            font-size: 0.72rem;
+            font-weight: 700;
+            box-shadow: none;
+        }
+        [class*="st-key-recent_eval_view_"] button,
+        [class*="st-key-recent_eval_retest_"] button,
+        [class*="st-key-recent_eval_jobs_prev"] button,
+        [class*="st-key-recent_eval_jobs_next"] button,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button,
+        [class*="st-key-refresh_recent_eval_jobs"] button {
+            border-color: rgba(148, 163, 184, 0.34);
+            color: #334155;
+            background: #ffffff;
+        }
+        [class*="st-key-recent_eval_view_"] button:hover,
+        [class*="st-key-recent_eval_retest_"] button:hover,
+        [class*="st-key-recent_eval_jobs_prev"] button:hover,
+        [class*="st-key-recent_eval_jobs_next"] button:hover,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button:hover,
+        [class*="st-key-refresh_recent_eval_jobs"] button:hover {
+            border-color: rgba(15, 118, 110, 0.28);
+            color: #0f766e;
+            background: #f8fffd;
+        }
+        [class*="st-key-recent_eval_jobs_pagebtn_active_"] button {
+            border-color: rgba(13, 148, 136, 0.26);
+            background: linear-gradient(180deg, #f0fdfa, #ecfeff);
+            color: #0f766e;
+        }
+        [class*="st-key-recent_eval_run_"] button {
+            border-color: rgba(13, 148, 136, 0.22);
+            background: linear-gradient(180deg, #f0fdfa, #ecfeff);
+            color: #0f766e;
+        }
+        [class*="st-key-recent_eval_run_"] button:hover {
+            border-color: rgba(13, 148, 136, 0.34);
+            background: linear-gradient(180deg, #ccfbf1, #ecfeff);
+            color: #115e59;
+        }
+        [class*="st-key-recent_eval_retest_"] button {
+            border-color: rgba(251, 191, 36, 0.22);
+            background: linear-gradient(180deg, #fffbeb, #fff7ed);
+            color: #b45309;
+        }
+        [class*="st-key-recent_eval_retest_"] button:hover {
+            border-color: rgba(245, 158, 11, 0.34);
+            background: linear-gradient(180deg, #fef3c7, #fff7ed);
+            color: #92400e;
+        }
+        .evj-stat-label {
+            display: block;
+            font-size: 0.68rem;
+            letter-spacing: 0.06em;
+            text-transform: uppercase;
+            color: #64748b;
+            font-weight: 800;
+            margin-bottom: 0.14rem;
+        }
+        .evj-stat-value {
+            display: block;
+            font-size: 1rem;
+            font-weight: 800;
+            color: #0f172a;
+        }
+        .evj-desc {
+            margin-top: 0.55rem;
+            font-size: 0.86rem;
+            color: #334155;
+        }
+        .evj-empty {
+            padding: 1rem 1.1rem;
+            border-radius: 18px;
+            background: #f8fafc;
+            border: 1px dashed rgba(148, 163, 184, 0.4);
+            color: #475569;
+        }
+        @media (max-width: 1080px) {
+            .evj-row {
+                grid-template-columns: 1fr;
+                gap: 8px;
+            }
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _render_recent_evaluator_job_card(job: Dict[str, Any], *, user_label: str = "(Auto)") -> None:
+    """Render one recent evaluator job as a single-row list item."""
+    variant = html.escape(job.get("status_variant", "unknown"))
+    status = html.escape(_status_display_label(job.get("status", "unknown") or "unknown"))
+    title_text = html.escape(job.get("title", "—"))
+    description = html.escape(job.get("description", "") or "")
+    catalog = html.escape(job.get("catalog", "") or "—")
+    catalog_url = html.escape(job.get("catalog_url", "") or "")
+    scheduled = html.escape(_format_jst_time_compact(job.get("scheduled_at")))
+    duration = html.escape(job.get("duration", "—"))
+    job_id = html.escape(str(job.get("job_id", "")))
+    build_status = html.escape(job.get("build_status", "") or "—")
+    test_status = html.escape(job.get("test_status", "") or "—")
+    created_label = html.escape(job.get("created_label", "—"))
+    git_sha = str(job.get("git_sha", "") or "").strip()
+    source_label = str(job.get("source_label", "") or "—").strip()
+    user_text = html.escape(user_label or "(Auto)")
+    report_url = html.escape(job.get("report_url", "") or "")
+    source_url = str(job.get("git_ref_url", "") or job.get("source_url", "") or "").strip()
+    git_commit_url = str(job.get("git_commit_url", "") or "").strip()
+    status_variant = job.get("status_variant", "unknown")
+    status_mark = {
+        "running": '<span class="evj-status-mark evj-status-mark--running" aria-hidden="true"></span>',
+        "success": '<span class="evj-status-mark evj-status-mark--success" aria-hidden="true">✓</span>',
+        "failed": '<span class="evj-status-mark evj-status-mark--failed" aria-hidden="true">!</span>',
+        "canceled": '<span class="evj-status-mark evj-status-mark--canceled" aria-hidden="true">×</span>',
+    }.get(status_variant, '<span class="evj-status-mark evj-status-mark--unknown" aria-hidden="true">?</span>')
+    meta_line = job_id
+    total = int(job.get("total", 0) or 0)
+    success = int(job.get("success", 0) or 0)
+    failed = int(job.get("failed", 0) or 0)
+    canceled = int(job.get("canceled", 0) or 0)
+    if status_variant == "running" and total == 0 and success == 0 and failed == 0 and canceled == 0:
+        counts = "Running..."
+    else:
+        counts = (
+            f'✅ <strong>{success}</strong> · '
+            f'❌ <strong>{failed}</strong> · '
+            f'⏹ <strong>{canceled}</strong> / '
+            f'<strong>{total}</strong>'
+        )
+    title_html = f'<a href="{report_url}" target="_blank" rel="noopener noreferrer">{title_text}</a>' if report_url else title_text
+    source_html = _format_source_ref_html(source_label, source_url, git_sha, git_commit_url)
+    catalog_html = (
+        f'<a href="{catalog_url}" target="_blank" rel="noopener noreferrer">{catalog}</a>'
+        if catalog_url else catalog
+    )
+    st.markdown(
+        f"""
+        <div class="evj-card evj-card--{variant}">
+          <div class="evj-row">
+            <div class="evj-name">
+              <div class="evj-title">{title_html}</div>
+              <div class="evj-name-sub">{meta_line}</div>
+            </div>
+            <div class="evj-cell evj-cell--nowrap">
+              <span class="evj-status evj-status--{variant}">{status_mark}{status}</span>
+            </div>
+            <div class="evj-cell">
+              <strong>{scheduled} ({created_label})</strong><br><span class="evj-name-sub">{duration}</span>
+            </div>
+            <div class="evj-cell evj-ref-cell">
+              <strong>{catalog_html}</strong><br><span class="evj-name-sub">{source_html}</span>
+            </div>
+            <div class="evj-cell evj-ref-cell">
+              <strong>{user_text}</strong>
+            </div>
+            <div class="evj-cell">
+              <span class="evj-name-sub">build {build_status} · test {test_status}</span><br>
+              <span class="evj-inline-stats">{counts}</span>
+            </div>
+          </div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _render_recent_evaluator_job_detail(project_id: str, environment: str, job: Dict[str, Any]) -> None:
+    """Render detailed evaluator-job information inside an expander."""
+    job_id = str(job.get("job_id", "") or "")
+    if not job_id:
+        st.warning("Missing job id.")
+        return
+    try:
+        detail = _fetch_evaluator_job_detail(project_id, environment, job_id)
+    except Exception as e:
+        st.error(f"Could not fetch evaluator details: {e}")
+        return
+
+    st.markdown("**Overview**")
+    top_cols = st.columns(4)
+    top_cols[0].metric("Total", int(detail.get("total", 0)))
+    top_cols[1].metric("Success", int(detail.get("success", 0)))
+    top_cols[2].metric("Failed", int(detail.get("failed", 0)))
+    top_cols[3].metric("Canceled", int(detail.get("canceled", 0)))
+
+    overview_left, overview_right = st.columns([1.3, 1.1])
+    with overview_left:
+        st.write(f"Status: `{detail.get('status', 'unknown')}`")
+        st.write(f"Title: `{detail.get('title', '—')}`")
+        st.write(f"Build/Test: `{detail.get('build_status', '—')}` / `{detail.get('test_status', '—')}`")
+        st.write(f"Ref: `{_format_source_ref_text(detail.get('target', ''), detail.get('git_sha', ''))}`")
+        st.write(f"Catalog: `{detail.get('catalog', '—')}`")
+        st.write(f"Repo: `{detail.get('source_repo_label', '—')}`")
+    with overview_right:
+        st.write(f"Scheduled: `{_format_jst_time(detail.get('scheduled_at'), include_seconds=True)}`")
+        st.write(f"Started: `{_format_jst_time(detail.get('started_at'), include_seconds=True)}`")
+        st.write(f"Finished: `{_format_jst_time(detail.get('finished_at'), include_seconds=True)}`")
+        st.write(f"Duration: `{detail.get('duration', '—')}`")
+
+    action_cols = st.columns([1.2, 1.2, 4])
+    report_url = detail.get("report_url", "")
+    catalog_url = detail.get("catalog_url", "")
+    source_url = detail.get("source_url", "") or detail.get("git_ref_url", "")
+    with action_cols[0]:
+        if report_url:
+            st.link_button("Open report", report_url, use_container_width=True)
+    with action_cols[1]:
+        if catalog_url:
+            st.link_button("Open catalog", catalog_url, use_container_width=True)
+    with action_cols[2]:
+        if source_url:
+            st.link_button("Open source", source_url, use_container_width=True)
+
+    if detail.get("fail_message"):
+        st.warning(detail.get("fail_message"))
+
+    suite_rows = detail.get("suite_rows") or []
+    with st.expander(f"Suites ({len(suite_rows)})", expanded=bool(suite_rows)):
+        if suite_rows:
+            st.dataframe(pd.DataFrame(suite_rows), width="stretch", hide_index=True)
+        else:
+            st.caption("No suite summary available.")
+
+    failed_case_rows = detail.get("failed_case_rows") or []
+    with st.expander(f"Failed Cases ({len(failed_case_rows)})", expanded=bool(failed_case_rows)):
+        if failed_case_rows:
+            st.dataframe(pd.DataFrame(failed_case_rows), width="stretch", hide_index=True)
+        else:
+            st.caption("No failed cases in the current report.")
+
+    with st.expander("Raw JSON", expanded=False):
+        st.json(detail.get("raw_report", {}))
+
+
+def _render_recent_evaluator_job_run_dialog(
+    project_id: str,
+    environment: str,
+    job: Dict[str, Any],
+    *,
+    output_path_default: str,
+    download_type_default: str,
+    phase_default: str,
+    skip_large_file_default: bool,
+    large_file_mb_default: float,
+    keep_zip_files_default: bool,
+) -> None:
+    """Render the dialog used to enqueue Download + Eval + Parquet from a recent job row."""
+    job_id = str(job.get("job_id", "") or "")
+    if not job_id:
+        st.error("Missing evaluator job id.")
+        return
+
+    detail = _fetch_evaluator_job_detail(project_id, environment, job_id)
+    suite_options = _extract_suite_selection_options(detail.get("suite_rows") or [])
+    suite_label_to_id = {opt["label"]: opt["id"] for opt in suite_options}
+    suite_labels = [opt["label"] for opt in suite_options]
+
+    st.caption("Confirm the workflow options for this evaluator job, then start a background task.")
+    summary_cols = st.columns([1.45, 1.15, 1.35, 1.05])
+    summary_cols[0].markdown(f"**Title**  \n`{detail.get('title', '—')}`")
+    summary_cols[1].markdown(f"**Status**  \n`{detail.get('status', 'unknown')}`")
+    summary_cols[2].markdown(f"**Catalog**  \n`{detail.get('catalog', '—')}`")
+    summary_cols[3].markdown(f"**Cases**  \n`{int(detail.get('total', 0))}`")
+
+    with st.form(key=f"recent_eval_run_form_{job_id}", border=False):
+        run_output_path = st.text_input(
+            "Output path",
+            value=output_path_default,
+            help="Folder under the data directory. This uses the same safe path rules as the main download workflow.",
+        )
+
+        if not suite_labels:
+            hint_cols = st.columns([1.2, 2.8])
+            with hint_cols[0]:
+                if st.form_submit_button("Refresh suites", use_container_width=True):
+                    _fetch_evaluator_job_detail.clear()
+                    st.rerun()
+            with hint_cols[1]:
+                st.caption("No suite candidates were available yet for this job. Refresh to re-read suite data from the evaluator API.")
+
+        selected_suite_labels = st.multiselect(
+            "Suites to download (optional)",
+            options=suite_labels,
+            default=[],
+            help="Leave empty to download all suites from this evaluator job.",
+            disabled=not suite_labels,
+        )
+
+        run_download_type = st.radio(
+            "Download type",
+            ["Archives (ZIP)", "Result JSON only"],
+            index=0 if download_type_default == "Archives (ZIP)" else 1,
+            horizontal=True,
+        )
+
+        run_phase = ""
+        run_skip_large_file = False
+        run_large_file_mb = 50.0
+        run_keep_zip_files = False
+        if run_download_type == "Archives (ZIP)":
+            run_phase = st.text_input(
+                "Phase to extract",
+                value=phase_default,
+                help="Enter the phase name to extract from archives.",
+            )
+            opt_cols = st.columns([1.2, 1.3, 1.2])
+            with opt_cols[0]:
+                run_skip_large_file = st.checkbox(
+                    "Skip large files",
+                    value=skip_large_file_default,
+                    help="Skip unusually large archives during download.",
+                )
+            with opt_cols[1]:
+                run_large_file_mb = st.number_input(
+                    "Skip threshold (MB)",
+                    min_value=1.0,
+                    max_value=5000.0,
+                    step=1.0,
+                    value=float(large_file_mb_default),
+                )
+            with opt_cols[2]:
+                run_keep_zip_files = st.checkbox(
+                    "Keep ZIP files",
+                    value=keep_zip_files_default,
+                    help="Keep downloaded ZIPs after extraction.",
+                )
+
+        run_cols = st.columns([1.25, 1.25, 1.1])
+        with run_cols[0]:
+            run_eval = st.checkbox(
+                "Run evaluation",
+                value=True,
+                help="Run eval_result and generate Summary.csv / Score.csv after download.",
+            )
+        with run_cols[1]:
+            generate_parquet = st.checkbox(
+                "Generate parquet",
+                value=CATALOG_IO_AVAILABLE,
+                disabled=not CATALOG_IO_AVAILABLE,
+                help="Build scene_result.parquet from .pkl files." if CATALOG_IO_AVAILABLE else "Install perception_catalog_analyzer to enable this.",
+            )
+        with run_cols[2]:
+            eval_recursive = st.checkbox(
+                "Recursive eval",
+                value=True,
+                help="Search subdirectories for evaluation result folders.",
+            )
+
+        action_cols = st.columns([1.15, 1.15, 3.7])
+        cancel_clicked = action_cols[0].form_submit_button("Cancel", use_container_width=True)
+        start_clicked = action_cols[1].form_submit_button("Start", type="primary", use_container_width=True)
+
+    if cancel_clicked:
+        st.session_state.pop("recent_eval_jobs_run_selected", None)
+        st.rerun()
+
+    if not start_clicked:
+        return
+
+    resolved_output, path_err = resolve_under_data_root(run_output_path, allow_create=True)
+    if path_err:
+        st.error(f"Output path is invalid: {path_err}")
+        return
+
+    selected_suite_ids = [suite_label_to_id[label] for label in selected_suite_labels]
+    resolved_path_str = str(resolved_output)
+    set_config_value("output_path", to_data_relative(resolved_output))
+    set_config_value("environment", environment)
+    set_config_value("project_id", project_id)
+    set_config_value("job_id", job_id)
+    set_config_value("suite_id", "")
+    set_config_value("suite_ids", selected_suite_ids)
+    set_config_value("download_type", run_download_type)
+    if run_download_type == "Archives (ZIP)":
+        set_config_value("phase", run_phase)
+        set_config_value("skip_large_file", run_skip_large_file)
+        set_config_value("large_file_mb", run_large_file_mb)
+        set_config_value("keep_zip_files", run_keep_zip_files)
+
+    params = {
+        "output_path": resolved_path_str,
+        "project_id": project_id,
+        "job_id": job_id,
+        "suite_id": "",
+        "suite_ids": selected_suite_ids or None,
+        "download_type": "archives" if run_download_type == "Archives (ZIP)" else "result_json",
+        "phase": run_phase if run_download_type == "Archives (ZIP)" else "",
+        "skip_large_file": run_skip_large_file if run_download_type == "Archives (ZIP)" else False,
+        "large_file_mb": run_large_file_mb if run_download_type == "Archives (ZIP)" else 50.0,
+        "keep_zip_files": run_keep_zip_files if run_download_type == "Archives (ZIP)" else False,
+        "run_eval": run_eval,
+        "generate_parquet": generate_parquet,
+        "eval_recursive": eval_recursive,
+        "eval_overwrite": False,
+        "eval_workers": _default_eval_workers(),
+    }
+    task_id = _enqueue_task("download_and_eval", params)
+    if not task_id:
+        st.error("Failed to enqueue task. Check REDIS_URL and DATABASE_URL.")
+        return
+
+    st.session_state["recent_eval_jobs_flash"] = (
+        f"Queued Download + Eval + Parquet for `{detail.get('title', job_id)}`. "
+        f"Task id: `{task_id}`."
+    )
+    st.session_state.pop("recent_eval_jobs_run_selected", None)
+    st.rerun()
+
+
+def _render_recent_evaluator_job_retest_dialog(
+    project_id: str,
+    environment: str,
+    job: Dict[str, Any],
+    *,
+    output_path_default: str,
+    phase_default: str,
+) -> None:
+    """Render a compact workflow launcher that reuses build artifacts from a prior evaluator job."""
+    job_id = str(job.get("job_id", "") or "")
+    if not job_id:
+        st.error("Missing evaluator job id.")
+        return
+
+    detail = _fetch_evaluator_job_detail(project_id, environment, job_id)
+    raw_report = detail.get("raw_report") or {}
+    raw_catalog = raw_report.get("catalog") or {}
+    resolved_source_job_id = _resolve_retest_source_job_id(
+        project_id,
+        environment,
+        job_id,
+        detail=detail,
+    )
+    suite_options = _extract_suite_selection_options(detail.get("suite_rows") or [])
+    suite_label_to_id = {opt["label"]: opt["id"] for opt in suite_options}
+    suite_labels = [opt["label"] for opt in suite_options]
+    preset_entries = _load_catalog_presets()
+    preset_names = [str(entry.get("display_name") or "").strip() for entry in preset_entries if str(entry.get("display_name") or "").strip()]
+    preset_by_name = {str(entry.get("display_name") or "").strip(): entry for entry in preset_entries}
+
+    original_catalog_name = str(raw_catalog.get("display_name") or detail.get("catalog") or "").strip()
+    original_catalog_id = str(raw_catalog.get("id") or "").strip()
+    default_preset_name = original_catalog_name if original_catalog_name in preset_by_name else ""
+
+    import re
+
+    default_output_path = output_path_default
+    if not default_output_path:
+        clean_target = re.sub(r"[^\w]+", "_", str(detail.get("target") or job_id).strip()).strip("_") or "artifact"
+        default_output_path = f"retest_{clean_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+
+    st.caption("Schedule a new evaluator workflow that reuses build artifacts from this job, then download and process the new results.")
+    summary_cols = st.columns([1.35, 1.0, 1.25, 1.2])
+    summary_cols[0].markdown(f"**Source job**  \n`{job_id}`")
+    summary_cols[1].markdown(f"**Ref**  \n`{detail.get('target', '—')}`")
+    summary_cols[2].markdown(f"**Original catalog**  \n`{original_catalog_name or '—'}`")
+    summary_cols[3].markdown(f"**Suites found**  \n`{len(suite_labels)}`")
+    if resolved_source_job_id and resolved_source_job_id != job_id:
+        st.caption(f"Using upstream source job `{resolved_source_job_id}` for scheduling because this job is already a retest.")
+
+    preset_key = f"recent_eval_retest_catalog_preset_{job_id}"
+    last_preset_key = f"recent_eval_retest_last_catalog_preset_{job_id}"
+    catalog_id_key = f"recent_eval_retest_catalog_id_{job_id}"
+    suite_selection_key = _retest_suite_selection_key(job_id)
+    if preset_key not in st.session_state:
+        st.session_state[preset_key] = default_preset_name
+    if last_preset_key not in st.session_state:
+        st.session_state[last_preset_key] = ""
+    if catalog_id_key not in st.session_state:
+        st.session_state[catalog_id_key] = original_catalog_id
+    if suite_selection_key not in st.session_state:
+        st.session_state[suite_selection_key] = []
+
+    selected_preset_name = st.selectbox(
+        "Catalog preset",
+        options=[""] + preset_names,
+        index=([""] + preset_names).index(default_preset_name) if default_preset_name in preset_names else 0,
+        key=preset_key,
+        help="Choose a preset catalog, or leave this empty and enter a catalog id manually.",
+        format_func=lambda value: value or "Custom / manual",
+    )
+    selected_preset = preset_by_name.get(selected_preset_name or "", {})
+    if st.session_state[last_preset_key] != selected_preset_name and selected_preset_name:
+        st.session_state[catalog_id_key] = str(selected_preset.get("catalog_id") or "")
+        st.session_state[last_preset_key] = selected_preset_name
+    elif st.session_state[last_preset_key] != selected_preset_name and not selected_preset_name:
+        st.session_state[catalog_id_key] = original_catalog_id
+        st.session_state[last_preset_key] = selected_preset_name
+    catalog_id = st.text_input(
+        "Catalog ID",
+        value="",
+        key=catalog_id_key,
+        help="You can switch to a different catalog while still reusing the build artifacts from the source job.",
+    ).strip()
+
+    selected_suite_labels = st.multiselect(
+        "Suites to run",
+        options=suite_labels,
+        key=suite_selection_key,
+        help="Defaults to empty. Leave it empty to let the evaluator use its default suite selection, or choose specific suites to rerun.",
+        disabled=not suite_labels,
+    )
+    description = st.text_input(
+        "Description",
+        value="",
+        help="Leave empty to use an automatic evaluator artifact-retest name.",
+    ).strip()
+    retest_output_path = st.text_input(
+        "Output path",
+        value=default_output_path,
+        help="Folder under the data directory for the downloaded retest results.",
+    )
+    run_download_type = st.radio(
+        "Download type",
+        ["Archives (ZIP)", "Result JSON only"],
+        index=0,
+        horizontal=True,
+    )
+    run_phase = ""
+    if run_download_type == "Archives (ZIP)":
+        run_phase = st.text_input(
+            "Phase to extract",
+            value=phase_default,
+            help="Enter the phase name to extract from archives.",
+        )
+
+    run_cols = st.columns([1.2, 1.2, 1.0])
+    with run_cols[0]:
+        run_eval = st.checkbox(
+            "Run evaluation",
+            value=True,
+            help="Run eval_result and generate Summary.csv / Score.csv after download.",
+        )
+    with run_cols[1]:
+        generate_parquet = st.checkbox(
+            "Generate parquet",
+            value=CATALOG_IO_AVAILABLE,
+            disabled=not CATALOG_IO_AVAILABLE,
+            help="Build scene_result.parquet from .pkl files." if CATALOG_IO_AVAILABLE else "Install perception_catalog_analyzer to enable this.",
+        )
+    with run_cols[2]:
+        eval_recursive = st.checkbox(
+            "Recursive eval",
+            value=True,
+            help="Search subdirectories for evaluation result folders.",
+        )
+
+    action_cols = st.columns([1.15, 1.15, 3.7])
+    cancel_clicked = action_cols[0].button("Cancel", key=f"recent_eval_retest_cancel_{job_id}", use_container_width=True)
+    start_clicked = action_cols[1].button("Retest", key=f"recent_eval_retest_start_{job_id}", type="primary", use_container_width=True)
+
+    if cancel_clicked:
+        st.session_state.pop(suite_selection_key, None)
+        st.session_state.pop("recent_eval_jobs_retest_selected", None)
+        st.rerun()
+
+    if not start_clicked:
+        return
+
+    final_catalog_id = str(selected_preset.get("catalog_id") or catalog_id or "").strip()
+    if not final_catalog_id:
+        st.error("Catalog ID is required.")
+        return
+
+    resolved_output, path_err = resolve_under_data_root(retest_output_path, allow_create=True)
+    if path_err:
+        st.error(f"Output path is invalid: {path_err}")
+        return
+
+    selected_suite_ids = [suite_label_to_id[label] for label in selected_suite_labels]
+    resolved_path_str = str(resolved_output)
+    has_custom_catalog = bool(final_catalog_id and not selected_preset_name)
+    final_description = description or _make_retest_description(
+        str(detail.get("target") or job_id),
+        selected_preset_name,
+        has_custom_catalog=has_custom_catalog,
+    )
+
+    task_id = _enqueue_task(
+        "run_evaluator_and_process",
+        {
+            "project_id": project_id,
+            "catalog_id": final_catalog_id,
+            "integration_id": "",
+            "source_job_id": resolved_source_job_id or job_id,
+            "suite_ids": selected_suite_ids or None,
+            "target_name": "",
+            "description": final_description,
+            "output_path": resolved_path_str,
+            "environment": environment,
+            "max_retries": 0,
+            "clean_build": False,
+            "debug": False,
+            "is_tag": False,
+            "download_type": "archives" if run_download_type == "Archives (ZIP)" else "result_json",
+            "phase": run_phase,
+            "skip_large_file": False,
+            "large_file_mb": 50.0,
+            "keep_zip_files": False,
+            "poll_interval": 60,
+            "max_wait_seconds": 6 * 3600,
+            "run_eval": run_eval,
+            "generate_parquet": generate_parquet,
+            "eval_recursive": eval_recursive,
+            "eval_overwrite": False,
+            "eval_workers": _default_eval_workers(),
+        },
+    )
+    if not task_id:
+        st.error("Failed to enqueue task. Check REDIS_URL and DATABASE_URL.")
+        return
+
+    set_config_value("output_path", to_data_relative(resolved_output))
+    set_config_value("environment", environment)
+    set_config_value("project_id", project_id)
+    set_config_value("catalog_id", final_catalog_id)
+    set_config_value("suite_ids", selected_suite_ids)
+
+    st.session_state["recent_eval_jobs_flash"] = (
+        f"Queued artifact retest for `{detail.get('title', job_id)}`. "
+        f"Task id: `{task_id}`."
+    )
+    st.session_state.pop(suite_selection_key, None)
+    st.session_state.pop("recent_eval_jobs_retest_selected", None)
+    st.rerun()
+
+
+def _render_recent_evaluator_jobs_section(
+    project_id: str,
+    environment: str,
+    *,
+    output_path_default: str,
+    download_type_default: str,
+    phase_default: str,
+    skip_large_file_default: bool,
+    large_file_mb_default: float,
+    keep_zip_files_default: bool,
+    show_toggle: bool = True,
+    default_visible: bool = False,
+    show_title: bool = True,
+) -> None:
+    """Render a direct evaluator-jobs browser above the download tabs."""
+    _inject_recent_evaluator_jobs_styles()
+    if show_toggle:
+        show_section = st.toggle(
+            "Show recent evaluator jobs",
+            value=st.session_state.get("recent_eval_jobs_show", default_visible),
+            key="recent_eval_jobs_show",
+            help="Load recent evaluator jobs only when you want to browse them.",
+        )
+    else:
+        show_section = True
+        st.session_state["recent_eval_jobs_show"] = True
+    if not show_section:
+        return
+
+    if show_title:
+        st.subheader("Recent evaluator jobs")
+        st.caption("Compact browser for recent evaluator jobs. Select one job to inspect detailed suite and failed-case information.")
+    flash_message = st.session_state.pop("recent_eval_jobs_flash", None)
+    if flash_message:
+        st.success(flash_message)
+    user_directory = _get_recent_eval_user_directory()
+
+    control_cols = st.columns([0.75, 1.0, 1.15, 1.45, 1.25, 1.0, 1.0, 0.75])
+    with control_cols[0]:
+        st.markdown('<div class="evj-toolbar-note">Rows</div>', unsafe_allow_html=True)
+        limit = int(
+            st.selectbox(
+                "Rows",
+                options=[10, 20, 50, 100],
+                index=1,
+                key="recent_eval_jobs_limit",
+                help="How many recent evaluator jobs to fetch for this project.",
+                label_visibility="collapsed",
+            )
+        )
+    with control_cols[1]:
+        st.markdown('<div class="evj-toolbar-note">Status</div>', unsafe_allow_html=True)
+        status_filter = st.multiselect(
+            "Status",
+            options=["running", "success", "failed", "canceled", "unknown"],
+            default=[],
+            key="recent_eval_jobs_status_filter",
+            help="Leave empty to show all recent jobs.",
+            label_visibility="collapsed",
+            placeholder="All statuses",
+        )
+    with control_cols[2]:
+        st.markdown('<div class="evj-toolbar-note">Search In</div>', unsafe_allow_html=True)
+        search_scope = st.selectbox(
+            "Search in",
+            options=["Branch/tag", "Description", "Job ID", "Git SHA", "Fail message"],
+            index=1,
+            key="recent_eval_jobs_search_scope",
+            help="Choose which evaluator field the quick search should target.",
+            label_visibility="collapsed",
+        )
+    with control_cols[3]:
+        st.markdown('<div class="evj-toolbar-note">Search</div>', unsafe_allow_html=True)
+        search_text = st.text_input(
+            "Search",
+            value=st.session_state.get("recent_eval_jobs_search_text", ""),
+            key="recent_eval_jobs_search_text",
+            help="Server-side search across the selected field.",
+            label_visibility="collapsed",
+            placeholder="Type to search evaluator jobs",
+        ).strip()
+    selected_user_name = ""
+    user_candidates = sorted(
+        {
+            info.get("name", "").strip()
+            for info in user_directory.values()
+            if info.get("name", "").strip()
+        },
+        key=str.lower,
+    )
+    with control_cols[4]:
+        st.markdown('<div class="evj-toolbar-note">User</div>', unsafe_allow_html=True)
+        selected_user_name = st.selectbox(
+            "User",
+            options=[""] + user_candidates,
+            index=0,
+            key="recent_eval_jobs_user_filter",
+            help="Filter jobs by resolved scheduled user name.",
+            label_visibility="collapsed",
+        )
+    with control_cols[5]:
+        st.markdown('<div class="evj-toolbar-note">From</div>', unsafe_allow_html=True)
+        date_from = st.date_input(
+            "From",
+            value=st.session_state.get("recent_eval_jobs_date_from", None),
+            key="recent_eval_jobs_date_from",
+            label_visibility="collapsed",
+            help="Scheduled-at lower bound in JST.",
+        )
+    with control_cols[6]:
+        st.markdown('<div class="evj-toolbar-note">To</div>', unsafe_allow_html=True)
+        date_to = st.date_input(
+            "To",
+            value=st.session_state.get("recent_eval_jobs_date_to", None),
+            key="recent_eval_jobs_date_to",
+            label_visibility="collapsed",
+            help="Scheduled-at upper bound in JST.",
+        )
+    with control_cols[7]:
+        st.markdown('<div class="evj-toolbar-note">Actions</div>', unsafe_allow_html=True)
+        if st.button("Refresh", key="refresh_recent_eval_jobs", use_container_width=True):
+            _fetch_recent_evaluator_job_pages.clear()
+            _fetch_evaluator_job_detail.clear()
+            st.rerun()
+
+    page_key = "recent_eval_jobs_page"
+    if page_key not in st.session_state:
+        st.session_state[page_key] = 1
+    if date_from and date_to and date_from > date_to:
+        st.warning("`From` date must be earlier than or equal to `To` date.")
+        return
+
+    def _render_job_list() -> None:
+        nonlocal user_directory
+        if not project_id:
+            st.info("Enter a project id to browse recent evaluator jobs.")
+            return
+        current_page = max(1, int(st.session_state.get(page_key, 1)))
+        pages_to_fetch = max(3, current_page + 2)
+        if search_text or status_filter or date_from or date_to or selected_user_name:
+            pages_to_fetch = max(pages_to_fetch, 6)
+        server_status_values = tuple(_status_filter_values(status_filter))
+        server_search_filter, search_needle = _build_recent_job_search_filter(search_text, search_scope, user_directory)
+        selected_user_ids = sorted(
+            {
+                subject_id
+                for subject_id, info in user_directory.items()
+                if selected_user_name
+                and selected_user_name.lower() == str(info.get("name") or "").strip().lower()
+            }
+        )
+        server_date_filters = _build_recent_job_date_filters(date_from, date_to)
+        extra_filters: List[Dict[str, Any]] = []
+        if server_search_filter:
+            extra_filters.append(server_search_filter)
+        if selected_user_ids:
+            extra_filters.append(
+                {
+                    "field": "scheduled_by",
+                    "operator": "In",
+                    "values": selected_user_ids,
+                }
+            )
+        extra_filters.extend(server_date_filters)
+        extra_filter_tuples = tuple(
+            (
+                str(f["field"]),
+                str(f["operator"]),
+                tuple(f.get("values", []) or []),
+            )
+            for f in extra_filters
+        )
+        fetch_help = "Loading evaluator jobs..."
+        if search_text or status_filter or date_from or date_to or selected_user_name:
+            fetch_help = "Loading evaluator jobs with filters..."
+        try:
+            with st.spinner(fetch_help):
+                fetched_pages = _fetch_recent_evaluator_job_pages(
+                    project_id,
+                    environment,
+                    limit,
+                    pages_to_fetch,
+                    status_values=server_status_values,
+                    extra_filters=extra_filter_tuples,
+                )
+        except requests.Timeout:
+            st.error("Timed out while loading evaluator jobs. The evaluator server may be slow right now. Try Refresh.")
+            return
+        except requests.RequestException as e:
+            st.error(_friendly_request_error_message(e))
+            return
+        except Exception as e:
+            st.error(_friendly_request_error_message(e))
+            return
+        if search_text:
+            _save_recent_job_search_history(search_scope, search_text)
+
+        jobs = [job for page in fetched_pages for job in page.get("jobs", [])]
+        user_directory = _hydrate_recent_eval_user_directory(jobs, environment)
+        has_more_from_api = bool(fetched_pages and fetched_pages[-1].get("next_token"))
+
+        if not fetched_pages:
+            st.warning("No response was returned from the evaluator server. Try Refresh.")
+            return
+
+        if search_needle:
+            if search_scope == "Branch/tag":
+                jobs = [job for job in jobs if search_needle in str(job.get("target", "")).lower()]
+            elif search_scope == "Description":
+                jobs = [job for job in jobs if search_needle in str(job.get("description", "")).lower() or search_needle in str(job.get("title", "")).lower()]
+            elif search_scope == "Job ID":
+                jobs = [job for job in jobs if search_needle in str(job.get("job_id", "")).lower()]
+            elif search_scope == "Git SHA":
+                jobs = [job for job in jobs if search_needle in str(job.get("git_sha", "")).lower()]
+            elif search_scope == "Fail message":
+                jobs = [job for job in jobs if search_needle in str(job.get("fail_message", "")).lower()]
+        if selected_user_name:
+            selected_lower = selected_user_name.lower()
+            jobs = [
+                job for job in jobs
+                if selected_lower == str((user_directory.get(str(job.get("scheduled_by") or "").strip(), {}) or {}).get("name", "")).strip().lower()
+            ]
+        if status_filter:
+            selected = {evaluator_api.normalize_job_status(v) for v in status_filter}
+            jobs = [job for job in jobs if job.get("status_variant") in selected or evaluator_api.normalize_job_status(job.get("status", "")) in selected]
+
+        if not jobs:
+            st.session_state[page_key] = 1
+            empty_message = "No recent evaluator jobs were returned."
+            if search_text or status_filter or date_from or date_to or selected_user_name:
+                empty_message = "No recent evaluator jobs matched the current filters."
+            st.markdown(f'<div class="evj-empty">{html.escape(empty_message)}</div>', unsafe_allow_html=True)
+            return
+
+        total_loaded = len(jobs)
+        has_next_page = total_loaded > current_page * limit or has_more_from_api
+        max_known_page = max(1, (total_loaded + limit - 1) // limit)
+        if current_page > max_known_page:
+            current_page = max_known_page
+            st.session_state[page_key] = current_page
+        start_idx = (current_page - 1) * limit
+        end_idx = start_idx + limit
+        visible_jobs = jobs[start_idx:end_idx]
+        if not visible_jobs and current_page > 1:
+            current_page = max(1, current_page - 1)
+            st.session_state[page_key] = current_page
+            start_idx = (current_page - 1) * limit
+            end_idx = start_idx + limit
+            visible_jobs = jobs[start_idx:end_idx]
+            has_next_page = total_loaded > current_page * limit
+
+        if current_page == 1:
+            page_numbers = list(range(1, min(3, max_known_page) + 1))
+        else:
+            page_numbers = list(
+                range(
+                    max(1, current_page - 1),
+                    min(max_known_page, current_page + 1) + 1,
+                )
+            )
+        pager_cols = st.columns([0.8, 0.9, 0.9, 0.9, 0.8, 5.7])
+        with pager_cols[0]:
+            if st.button("‹", key="recent_eval_jobs_prev", use_container_width=True, disabled=current_page <= 1):
+                st.session_state[page_key] = max(1, current_page - 1)
+                st.rerun()
+        for idx, page_num in enumerate(page_numbers[:3], start=1):
+            with pager_cols[idx]:
+                btn_key = (
+                    f"recent_eval_jobs_pagebtn_active_{page_num}"
+                    if page_num == current_page
+                    else f"recent_eval_jobs_pagebtn_{page_num}"
+                )
+                if st.button(
+                    str(page_num),
+                    key=btn_key,
+                    use_container_width=True,
+                    disabled=page_num == current_page,
+                ):
+                    st.session_state[page_key] = page_num
+                    st.rerun()
+        with pager_cols[4]:
+            if st.button("›", key="recent_eval_jobs_next", use_container_width=True, disabled=not has_next_page):
+                st.session_state[page_key] = current_page + 1
+                st.rerun()
+
+        selected_job_id = st.session_state.get("recent_eval_jobs_selected")
+        if selected_job_id and not any(str(job.get("job_id", "")) == str(selected_job_id) for job in jobs):
+            st.session_state.pop("recent_eval_jobs_selected", None)
+            selected_job_id = None
+
+        selected_run_job_id = st.session_state.get("recent_eval_jobs_run_selected")
+        if selected_run_job_id and not any(str(job.get("job_id", "")) == str(selected_run_job_id) for job in jobs):
+            st.session_state.pop("recent_eval_jobs_run_selected", None)
+            selected_run_job_id = None
+
+        selected_retest_job_id = st.session_state.get("recent_eval_jobs_retest_selected")
+        if selected_retest_job_id and not any(str(job.get("job_id", "")) == str(selected_retest_job_id) for job in jobs):
+            st.session_state.pop("recent_eval_jobs_retest_selected", None)
+            selected_retest_job_id = None
+
+        st.markdown('<div class="evj-list">', unsafe_allow_html=True)
+        for job in visible_jobs:
+            subject_id = str(job.get("scheduled_by") or "").strip()
+            user_info = user_directory.get(subject_id, {})
+            user_label = str(user_info.get("name") or subject_id or "(Auto)").strip()
+            row_cols = st.columns([9.2, 2.6])
+            with row_cols[0]:
+                _render_recent_evaluator_job_card(job, user_label=user_label)
+            with row_cols[1]:
+                action_cols = st.columns([1.0, 1.0, 1.0], gap="small")
+                with action_cols[0]:
+                    if st.button("Details", key=f"recent_eval_view_{job['job_id']}", use_container_width=True):
+                        st.session_state["recent_eval_jobs_selected"] = str(job["job_id"])
+                        _fetch_evaluator_job_detail.clear()
+                        st.rerun()
+                with action_cols[1]:
+                    if st.button("Start", key=f"recent_eval_run_{job['job_id']}", use_container_width=True):
+                        st.session_state["recent_eval_jobs_run_selected"] = str(job["job_id"])
+                        _fetch_evaluator_job_detail.clear()
+                        st.rerun()
+                with action_cols[2]:
+                    if st.button("Retest", key=f"recent_eval_retest_{job['job_id']}", use_container_width=True):
+                        st.session_state.pop(_retest_suite_selection_key(str(job["job_id"])), None)
+                        st.session_state["recent_eval_jobs_retest_selected"] = str(job["job_id"])
+                        _fetch_evaluator_job_detail.clear()
+                        st.rerun()
+        st.markdown("</div>", unsafe_allow_html=True)
+
+        selected_job_id = st.session_state.get("recent_eval_jobs_selected")
+        if selected_job_id:
+            selected_job = next((job for job in jobs if str(job.get("job_id", "")) == str(selected_job_id)), None)
+            if selected_job:
+                if callable(getattr(st, "dialog", None)):
+                    try:
+                        @st.dialog(f"Job details · {selected_job.get('title', '—')}", width="large")
+                        def _recent_eval_job_dialog() -> None:
+                            _render_recent_evaluator_job_detail(project_id, environment, selected_job)
+                            if st.button("Close", key="recent_eval_jobs_close_detail", use_container_width=True):
+                                st.session_state.pop("recent_eval_jobs_selected", None)
+                                st.rerun()
+
+                        _recent_eval_job_dialog()
+                    finally:
+                        st.session_state.pop("recent_eval_jobs_selected", None)
+                else:
+                    st.markdown('<div class="evj-detail">', unsafe_allow_html=True)
+                    hdr_cols = st.columns([4.4, 1.1])
+                    with hdr_cols[0]:
+                        st.subheader(f"Job details · {selected_job.get('title', '—')}")
+                    with hdr_cols[1]:
+                        if st.button("Close", key="recent_eval_jobs_close_detail_fallback", use_container_width=True):
+                            st.session_state.pop("recent_eval_jobs_selected", None)
+                            st.rerun()
+                    _render_recent_evaluator_job_detail(project_id, environment, selected_job)
+                    st.markdown("</div>", unsafe_allow_html=True)
+
+        selected_run_job_id = st.session_state.get("recent_eval_jobs_run_selected")
+        if selected_run_job_id:
+            selected_run_job = next((job for job in jobs if str(job.get("job_id", "")) == str(selected_run_job_id)), None)
+            if selected_run_job:
+                if callable(getattr(st, "dialog", None)):
+                    try:
+                        @st.dialog(f"Download + Eval + Parquet · {selected_run_job.get('title', '—')}", width="large")
+                        def _recent_eval_run_dialog() -> None:
+                            _render_recent_evaluator_job_run_dialog(
+                                project_id,
+                                environment,
+                                selected_run_job,
+                                output_path_default=output_path_default,
+                                download_type_default=download_type_default,
+                                phase_default=phase_default,
+                                skip_large_file_default=skip_large_file_default,
+                                large_file_mb_default=large_file_mb_default,
+                                keep_zip_files_default=keep_zip_files_default,
+                            )
+
+                        _recent_eval_run_dialog()
+                    finally:
+                        if st.session_state.get("recent_eval_jobs_run_selected") == str(selected_run_job_id):
+                            st.session_state.pop("recent_eval_jobs_run_selected", None)
+                else:
+                    st.markdown('<div class="evj-detail">', unsafe_allow_html=True)
+                    hdr_cols = st.columns([4.4, 1.1])
+                    with hdr_cols[0]:
+                        st.subheader(f"Download + Eval + Parquet · {selected_run_job.get('title', '—')}")
+                    with hdr_cols[1]:
+                        if st.button("Close", key="recent_eval_jobs_close_run_fallback", use_container_width=True):
+                            st.session_state.pop("recent_eval_jobs_run_selected", None)
+                            st.rerun()
+                    _render_recent_evaluator_job_run_dialog(
+                        project_id,
+                        environment,
+                        selected_run_job,
+                        output_path_default=output_path_default,
+                        download_type_default=download_type_default,
+                        phase_default=phase_default,
+                        skip_large_file_default=skip_large_file_default,
+                        large_file_mb_default=large_file_mb_default,
+                        keep_zip_files_default=keep_zip_files_default,
+                    )
+                    st.markdown("</div>", unsafe_allow_html=True)
+
+        selected_retest_job_id = st.session_state.get("recent_eval_jobs_retest_selected")
+        if selected_retest_job_id:
+            selected_retest_job = next((job for job in jobs if str(job.get("job_id", "")) == str(selected_retest_job_id)), None)
+            if selected_retest_job:
+                if callable(getattr(st, "dialog", None)):
+                    try:
+                        @st.dialog(f"Artifact retest · {selected_retest_job.get('title', '—')}", width="large")
+                        def _recent_eval_retest_dialog() -> None:
+                            _render_recent_evaluator_job_retest_dialog(
+                                project_id,
+                                environment,
+                                selected_retest_job,
+                                output_path_default=output_path_default,
+                                phase_default=phase_default,
+                            )
+
+                        _recent_eval_retest_dialog()
+                    finally:
+                        if st.session_state.get("recent_eval_jobs_retest_selected") == str(selected_retest_job_id):
+                            st.session_state.pop("recent_eval_jobs_retest_selected", None)
+                else:
+                    st.markdown('<div class="evj-detail">', unsafe_allow_html=True)
+                    hdr_cols = st.columns([4.4, 1.1])
+                    with hdr_cols[0]:
+                        st.subheader(f"Artifact retest · {selected_retest_job.get('title', '—')}")
+                    with hdr_cols[1]:
+                        if st.button("Close", key="recent_eval_jobs_close_retest_fallback", use_container_width=True):
+                            st.session_state.pop("recent_eval_jobs_retest_selected", None)
+                            st.rerun()
+                    _render_recent_evaluator_job_retest_dialog(
+                        project_id,
+                        environment,
+                        selected_retest_job,
+                        output_path_default=output_path_default,
+                        phase_default=phase_default,
+                    )
+                    st.markdown("</div>", unsafe_allow_html=True)
+
+    _render_job_list()
diff --git a/evaluation_dashboard_app/lib/ui/styles_global.py b/evaluation_dashboard_app/lib/ui/styles_global.py
index f4118e8..a66be10 100644
--- a/evaluation_dashboard_app/lib/ui/styles_global.py
+++ b/evaluation_dashboard_app/lib/ui/styles_global.py
@@ -49,21 +49,21 @@ def inject_app_page_styles() -> None:
         """,
         unsafe_allow_html=True,
     )
-    try:
-        from lib.deploy_debug import running_in_docker
+    # try:
+    #     from lib.deploy_debug import running_in_docker
 
-        if not running_in_docker():
-            st.markdown(
-                """
-                <style>
-                /* 99_Deployment_Debug.py is registered for st.page_link in Docker; hide default nav outside containers. */
-                section[data-testid="stSidebar"] a[href*="Deployment_Debug"],
-                section[data-testid="stSidebar"] a[href*="deployment_debug"] {
-                    display: none !important;
-                }
-                </style>
-                """,
-                unsafe_allow_html=True,
-            )
-    except Exception:
-        pass
+    #     if not running_in_docker():
+    #         st.markdown(
+    #             """
+    #             <style>
+    #             /* 99_Deployment_Debug.py is registered for st.page_link in Docker; hide default nav outside containers. */
+    #             section[data-testid="stSidebar"] a[href*="Deployment_Debug"],
+    #             section[data-testid="stSidebar"] a[href*="deployment_debug"] {
+    #                 display: none !important;
+    #             }
+    #             </style>
+    #             """,
+    #             unsafe_allow_html=True,
+    #         )
+    # except Exception:
+    #     pass
diff --git a/evaluation_dashboard_app/lib/ui/task_history.py b/evaluation_dashboard_app/lib/ui/task_history.py
new file mode 100644
index 0000000..e5d05f5
--- /dev/null
+++ b/evaluation_dashboard_app/lib/ui/task_history.py
@@ -0,0 +1,280 @@
+"""Shared task history/list rendering used across pages."""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime, timedelta, timezone
+from typing import Any, Dict, List, Optional
+
+import streamlit as st
+
+from lib.auth import get_current_user_id, is_auth_enabled
+from lib.db import delete_task, get_task
+from lib.ui.download_ui import TaskCardMode, render_task_list_empty_state, task_list_card_markup
+from lib.ui.task_result_summary import render_task_result_summary
+
+_JST = timezone(timedelta(hours=9))
+
+
+def _to_jst(dt: Any) -> Optional[datetime]:
+    """Convert datetime to JST for display. Naive datetimes are assumed UTC."""
+    if dt is None:
+        return None
+    if not hasattr(dt, "astimezone"):
+        return None
+    try:
+        if getattr(dt, "tzinfo", None) is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt.astimezone(_JST)
+    except Exception:
+        return None
+
+
+def _task_type_label(task_type: str) -> str:
+    labels = {
+        "download_results": "Download results",
+        "download_scenarios": "Download scenarios",
+        "run_eval_dirs": "Run eval dirs",
+        "generate_summary_csv": "Generate summary CSV",
+        "build_parquet": "Build parquet",
+        "download_and_eval": "Download + Eval",
+        "run_evaluator_and_process": "Run Evaluator + Process",
+        "run_release_specsheet_workflow": "Release Specsheet",
+    }
+    return labels.get(task_type, task_type or "Task")
+
+
+def _task_summary(t: Dict[str, Any]) -> str:
+    params = t.get("parameters") or {}
+    task_type = t.get("type", "")
+    if task_type == "download_results":
+        out = params.get("output_path") or params.get("job_id") or ""
+        return f"job_id={params.get('job_id', '')} → {out}"
+    if task_type == "download_scenarios":
+        out = params.get("output_dir") or params.get("output_path") or ""
+        return f"job_id={params.get('job_id', '')} → {out}"
+    if task_type in ("run_eval_dirs", "generate_summary_csv"):
+        return params.get("eval_root", "")
+    if task_type == "build_parquet":
+        return params.get("pkl_dir", "")
+    if task_type == "download_and_eval":
+        out = params.get("output_path") or params.get("job_id") or ""
+        parts = ["download"]
+        if params.get("run_eval"):
+            parts.append("eval")
+        if params.get("generate_parquet"):
+            parts.append("parquet")
+        return f"job_id={params.get('job_id', '')} [{'+'.join(parts)}] → {out}"
+    if task_type == "run_evaluator_and_process":
+        target = params.get("target_name", "")
+        target_type = "tag" if params.get("is_tag", False) else "branch"
+        return f"{target_type}={target} → {params.get('output_path', '')}"
+    if task_type == "run_release_specsheet_workflow":
+        target = params.get("target_name", "")
+        target_type = "tag" if params.get("is_tag", False) else "branch"
+        return f"{target_type}={target} → {params.get('output_path', '')}"
+    return ""
+
+
+def _task_time_str(t: Dict[str, Any]) -> str:
+    created = t.get("created_at")
+    dt = _to_jst(created) if created else None
+    if not dt:
+        return "—"
+    try:
+        return dt.strftime("%b %d, %H:%M")
+    except Exception:
+        return str(created)[:16] if created else "—"
+
+
+def _task_duration(t: Dict[str, Any]) -> Optional[str]:
+    created = t.get("created_at")
+    updated = t.get("updated_at")
+    if not created or not updated:
+        return None
+    try:
+        start = created.timestamp() if hasattr(created, "timestamp") else None
+        end = updated.timestamp() if hasattr(updated, "timestamp") else None
+        if start is None or end is None:
+            return None
+        secs = int(end - start)
+        if secs < 60:
+            return f"{secs}s"
+        if secs < 3600:
+            return f"{secs // 60}m {secs % 60}s"
+        return f"{secs // 3600}h {(secs % 3600) // 60}m"
+    except Exception:
+        return None
+
+
+def render_task_detail_content(t: Dict[str, Any]) -> None:
+    """Render full task detail content."""
+    try:
+        _render_task_detail_content_impl(t)
+    except Exception as e:
+        st.error(f"Could not load task details: {e}")
+        import traceback
+        st.code(traceback.format_exc(), language=None)
+
+
+def _render_task_detail_content_impl(t: Dict[str, Any]) -> None:
+    status = t.get("status", "")
+    created_jst = _to_jst(t.get("created_at"))
+    updated_jst = _to_jst(t.get("updated_at"))
+    time_parts = []
+    if created_jst:
+        try:
+            time_parts.append(f"Created: {created_jst.strftime('%Y-%m-%d %H:%M:%S')} JST")
+        except Exception:
+            time_parts.append(f"Created: {t.get('created_at')}")
+    if updated_jst and updated_jst != created_jst:
+        try:
+            time_parts.append(f"Updated: {updated_jst.strftime('%Y-%m-%d %H:%M:%S')} JST")
+        except Exception:
+            time_parts.append(f"Updated: {t.get('updated_at')}")
+    if time_parts:
+        st.caption(" · ".join(time_parts))
+
+    result_summary_raw = t.get("result_summary")
+    if result_summary_raw:
+        try:
+            result_summary = json.loads(result_summary_raw) if isinstance(result_summary_raw, str) else result_summary_raw
+            render_task_result_summary(result_summary)
+            st.markdown("---")
+        except (TypeError, ValueError):
+            pass
+    if t.get("result_path"):
+        st.text_input(
+            "Result path",
+            value=t["result_path"],
+            key=f"rp_modal_{str(t.get('id'))}",
+            disabled=True,
+            label_visibility="collapsed",
+        )
+    if status == "failed" and t.get("error_message"):
+        st.error(t.get("error_message"))
+    progress_message = (t.get("progress_message") or "").strip()
+    if progress_message:
+        st.info(progress_message)
+    log_output = (t.get("log_output") or "").strip()
+    if log_output:
+        st.caption("Log output")
+        st.code(log_output, language=None)
+    params = t.get("parameters") or {}
+    if params:
+        st.caption("Parameters")
+        st.json(params)
+
+
+def _open_task_detail(task_id: str) -> None:
+    st.session_state["_task_detail_id"] = str(task_id)
+
+
+def _render_one_task_row(
+    t: Dict[str, Any],
+    current_user: Optional[str],
+    use_dialog: bool,
+    *,
+    mode: TaskCardMode,
+) -> None:
+    task_id = t.get("id", "")
+    status = t.get("status", "")
+    status_labels = {"pending": "Pending", "running": "Running", "completed": "Completed", "failed": "Failed"}
+    status_label = status_labels.get(status, status)
+    summary = _task_summary(t)
+    sid = str(task_id)
+    summary_short = (
+        (summary[:72] + "…") if mode == "history" and summary and len(summary) > 72 else (summary if mode == "history" else "—")
+    ) or "—"
+    progress_msg = (t.get("progress_message") or "").strip()
+    card = task_list_card_markup(
+        task_id=sid,
+        type_label=_task_type_label(t.get("type", "")),
+        status=status,
+        status_label=status_label,
+        time_str=_task_time_str(t),
+        duration=_task_duration(t) or "—",
+        summary_short=summary_short,
+        progress_pct=t.get("progress_pct"),
+        progress_message=progress_msg,
+        mode=mode,
+    )
+    st.markdown(f'<div class="dl-task-stack">{card}</div>', unsafe_allow_html=True)
+
+    if use_dialog:
+        bv, bd, _sp = st.columns([1.15, 1.15, 4])
+        with bv:
+            st.button("View", key=f"view_{sid}", on_click=_open_task_detail, args=(sid,))
+        with bd:
+            stop_lbl = "Stop" if status in ("pending", "running") else "Remove"
+            stop_help = (
+                "Cancels the Redis/RQ job when possible, then removes this row from the list."
+                if status in ("pending", "running")
+                else "Remove this row from the task list."
+            )
+            if st.button(stop_lbl, key=f"del_{sid}", type="secondary", help=stop_help):
+                delete_task(sid, session_id=current_user)
+                st.rerun()
+    else:
+        bd, _sp = st.columns([1.15, 4])
+        with bd:
+            stop_lbl = "Stop" if status in ("pending", "running") else "Remove"
+            stop_help = (
+                "Cancels the Redis/RQ job when possible, then removes this row from the list."
+                if status in ("pending", "running")
+                else "Remove this row from the task list."
+            )
+            if st.button(stop_lbl, key=f"del_{sid}", type="secondary", help=stop_help):
+                delete_task(sid, session_id=current_user)
+                st.rerun()
+
+    if not use_dialog:
+        with st.expander("More", expanded=False):
+            render_task_detail_content(t)
+
+
+def render_task_list(tasks: List[Dict[str, Any]], current_user: Optional[str]) -> bool:
+    """Render the shared active/history task list. Returns True if any active tasks exist."""
+    if current_user:
+        st.caption(f"Logged in as **{current_user}** · your recent tasks only")
+    if not tasks:
+        render_task_list_empty_state()
+        return False
+
+    active = [t for t in tasks if t.get("status") in ("pending", "running")]
+    history = [t for t in tasks if t.get("status") not in ("pending", "running")]
+    use_dialog = callable(getattr(st, "dialog", None))
+
+    for t in active:
+        _render_one_task_row(t, current_user, use_dialog, mode="active_compact")
+
+    if history:
+        with st.expander(f"Task history ({len(history)})", expanded=False):
+            for t in history:
+                _render_one_task_row(t, current_user, use_dialog, mode="history")
+
+    if use_dialog and st.session_state.get("_task_detail_id"):
+        task_id = st.session_state["_task_detail_id"]
+        try:
+            detail_task = get_task(task_id)
+            if detail_task:
+
+                @st.dialog("Task details", width="large")
+                def _task_detail_modal():
+                    render_task_detail_content(detail_task)
+                    if st.button("Close"):
+                        st.session_state.pop("_task_detail_id", None)
+                        st.rerun()
+
+                _task_detail_modal()
+        except Exception as e:
+            st.error(f"Could not open task details: {e}")
+        finally:
+            st.session_state.pop("_task_detail_id", None)
+
+    return len(active) > 0
+
+
+def get_task_list_current_user() -> Optional[str]:
+    """Return current user id when auth is enabled, else None."""
+    return get_current_user_id() if is_auth_enabled() else None
diff --git a/evaluation_dashboard_app/lib/ui/task_result_summary.py b/evaluation_dashboard_app/lib/ui/task_result_summary.py
new file mode 100644
index 0000000..b7e0038
--- /dev/null
+++ b/evaluation_dashboard_app/lib/ui/task_result_summary.py
@@ -0,0 +1,222 @@
+"""Shared task result-summary renderers used by background task pages."""
+
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+import streamlit as st
+
+
+def render_summary_table(rows: Optional[List[Dict[str, Any]]]) -> None:
+    """Render a summary table from rows (e.g. Scenario Name, Scenario ID, Status) when present."""
+    if not rows:
+        return
+    try:
+        df = pd.DataFrame(rows)
+        st.subheader("Download Status")
+        st.dataframe(df, width="stretch")
+    except Exception:
+        pass
+
+
+def render_task_result_summary(summary: Dict[str, Any]) -> None:
+    """Render a result summary block from task result_summary JSON."""
+    job = summary.get("job", "")
+    if job == "download_results":
+        total = summary.get("total", 0)
+        success = summary.get("success", 0)
+        failed = summary.get("failed", 0)
+        out = summary.get("output_path", "")
+        st.subheader("Summary")
+        st.write(f"- Total scenarios processed: **{total}**")
+        st.write(f"- Successfully downloaded: **{success}**")
+        if failed:
+            st.write(f"- Failed: **{failed}**")
+        st.write(f"- Output directory: `{out}`")
+        if success > 0:
+            st.info("To generate the final summary CSV files, go to the **Eval Results** tab and run the evaluation.")
+        render_summary_table(summary.get("rows"))
+    elif job == "download_scenarios":
+        total = summary.get("total", 0)
+        success = summary.get("success", 0)
+        failed = summary.get("failed", 0)
+        out = summary.get("output_path", "")
+        st.subheader("Summary")
+        st.write(f"- Total scenarios: **{total}**")
+        st.write(f"- Successfully downloaded: **{success}**")
+        if failed:
+            st.write(f"- Failed: **{failed}**")
+        st.write(f"- Result JSON files: **{total}** downloaded.")
+        st.write(f"- Output directory: `{out}`")
+        if success > 0:
+            st.info("To generate summary CSV files, go to the **Eval Results** tab and run the evaluation.")
+        render_summary_table(summary.get("rows"))
+    elif job == "run_eval_dirs":
+        dirs = summary.get("directories_processed", 0)
+        path = summary.get("summary_path", "")
+        srows = summary.get("summary_rows", 0)
+        scrows = summary.get("score_rows", 0)
+        st.subheader("Eval Summary")
+        st.write(f"- Directories processed: **{dirs}**")
+        st.write(f"- Generated Summary.csv (**{srows}** rows) and Score.csv (**{scrows}** rows) in `{path}`")
+    elif job == "generate_summary_csv":
+        path = summary.get("summary_path", "")
+        srows = summary.get("summary_rows", 0)
+        scrows = summary.get("score_rows", 0)
+        st.subheader("Summary")
+        st.write(f"- Generated Summary.csv (**{srows}** rows) and Score.csv (**{scrows}** rows) in `{path}`")
+    elif job == "build_parquet":
+        path = summary.get("output_path", "")
+        st.subheader("Summary")
+        st.write(f"- Output: `{path}`")
+    elif job == "download_and_eval":
+        dl_summary = summary.get("download_summary", {})
+        eval_summary_data = summary.get("eval_summary", {})
+        parquet_path = summary.get("parquet_path", "")
+        errors = summary.get("errors", [])
+
+        st.subheader("Download + Eval + Parquet Summary")
+
+        dl_success = summary.get("download_success", False)
+        if dl_success:
+            st.write("✅ **Download: SUCCESS**")
+            st.write(
+                f"   - Total: **{dl_summary.get('total', 0)}**, "
+                f"Success: **{dl_summary.get('success', 0)}**, "
+                f"Failed: **{dl_summary.get('failed', 0)}**"
+            )
+        else:
+            st.write("❌ **Download: FAILED**")
+            if errors:
+                for err in errors:
+                    st.write(f"   - {err}")
+
+        if eval_summary_data:
+            st.write("✅ **Eval: SUCCESS**")
+            st.write(f"   - Directories processed: **{eval_summary_data.get('directories_processed', 0)}**")
+            st.write(
+                f"   - Summary.csv: **{eval_summary_data.get('summary_rows', 0)}** rows, "
+                f"Score.csv: **{eval_summary_data.get('score_rows', 0)}** rows"
+            )
+
+        if parquet_path:
+            st.write(f"✅ **Parquet: SUCCESS** → `{parquet_path}`")
+
+        if errors:
+            st.error("Errors during execution:")
+            for err in errors:
+                st.write(f"- {err}")
+    elif job == "run_evaluator_and_process":
+        evaluator_job_id = summary.get("evaluator_job_id", "")
+        evaluator_report_url = summary.get("evaluator_report_url", "")
+        evaluator_status = summary.get("evaluator_status", "unknown")
+        evaluator_build_status = summary.get("evaluator_build_status", "")
+        evaluator_test_status = summary.get("evaluator_test_status", "")
+        evaluator_fail_message = summary.get("evaluator_fail_message", "")
+        evaluator_case_totals = summary.get("evaluator_case_totals", {})
+        evaluator_suites = summary.get("evaluator_suites", [])
+        evaluator_failed_cases = summary.get("evaluator_failed_cases", [])
+        dl_summary = summary.get("download_summary", {})
+        download_rows = summary.get("download_rows", [])
+        eval_summary_data = summary.get("eval_summary", {})
+        parquet_path = summary.get("parquet_path", "")
+
+        st.subheader("Run Evaluator + Download + Eval + Parquet Summary")
+
+        st.write("🎯 **Evaluator**")
+        st.write(f"   - Job ID: `{evaluator_job_id}`")
+        st.write(f"   - Status: **{evaluator_status}**")
+        if evaluator_build_status:
+            st.write(f"   - Build: **{evaluator_build_status}**")
+        if evaluator_test_status:
+            st.write(f"   - Test: **{evaluator_test_status}**")
+        if evaluator_case_totals:
+            st.write(
+                "   - Case results: "
+                f"**{evaluator_case_totals.get('success', 0)}** success, "
+                f"**{evaluator_case_totals.get('failed', 0)}** failed, "
+                f"**{evaluator_case_totals.get('canceled', 0)}** canceled "
+                f"(total **{evaluator_case_totals.get('total', 0)}**)"
+            )
+        if evaluator_fail_message:
+            st.write(f"   - Message: `{evaluator_fail_message}`")
+        if evaluator_report_url:
+            st.markdown(f"   - Report: [Open]({evaluator_report_url})")
+        if evaluator_suites:
+            st.caption("Evaluator suite summary")
+            st.dataframe(pd.DataFrame(evaluator_suites), width="stretch", hide_index=True)
+        if evaluator_failed_cases:
+            st.caption("Failed cases from evaluator")
+            st.dataframe(pd.DataFrame(evaluator_failed_cases), width="stretch", hide_index=True)
+
+        dl_total = dl_summary.get("total", 0)
+        dl_success = dl_summary.get("success", 0)
+        dl_failed = dl_summary.get("failed", 0)
+        st.write("📥 **Download**")
+        st.write(f"   - Total: **{dl_total}**, Success: **{dl_success}**, Failed: **{dl_failed}**")
+        if download_rows:
+            render_summary_table(download_rows)
+
+        if eval_summary_data:
+            st.write("🧮 **Evaluation**")
+            st.write(f"   - Directories processed: **{eval_summary_data.get('directories_processed', 0)}**")
+            st.write(
+                f"   - Success: **{eval_summary_data.get('success', 0)}**, "
+                f"Failed: **{eval_summary_data.get('failed', 0)}**"
+            )
+            st.write(
+                f"   - Summary.csv: **{eval_summary_data.get('summary_rows', 0)}** rows, "
+                f"Score.csv: **{eval_summary_data.get('score_rows', 0)}** rows"
+            )
+
+        if parquet_path:
+            st.write("📦 **Parquet**")
+            st.write(f"   - Output: `{parquet_path}`")
+
+        if evaluator_report_url:
+            st.markdown(f"### [📊 View Evaluator Report]({evaluator_report_url})")
+    elif job == "run_release_specsheet_workflow":
+        st.subheader("Release Specsheet Summary")
+        st.write(f"📁 **Release root:** `{summary.get('release_root', '')}`")
+        st.write(f"🏷️ **Version:** `{summary.get('version', '')}`")
+        evaluator_jobs = summary.get("evaluator_jobs", {})
+        if evaluator_jobs:
+            rows = []
+            for role, payload in evaluator_jobs.items():
+                rows.append(
+                    {
+                        "role": role,
+                        "job_id": payload.get("job_id", ""),
+                        "status": payload.get("status", ""),
+                        "catalog_id": payload.get("catalog_id", ""),
+                        "suite_count": payload.get("suite_count", ""),
+                        "description": payload.get("description", ""),
+                        "report_url": payload.get("report_url", ""),
+                    }
+                )
+            st.dataframe(pd.DataFrame(rows), width="stretch", hide_index=True)
+        analysis_artifacts = summary.get("analysis_artifacts", {})
+        if analysis_artifacts:
+            st.write("🔎 **Detailed analysis artifacts:**")
+            rows = []
+            for role, payload in analysis_artifacts.items():
+                download = payload.get("download", {}) if isinstance(payload.get("download"), dict) else {}
+                eval_summary = payload.get("eval", {}) if isinstance(payload.get("eval"), dict) else {}
+                warnings = payload.get("warnings", []) if isinstance(payload.get("warnings"), list) else []
+                rows.append(
+                    {
+                        "role": role,
+                        "path": payload.get("path", ""),
+                        "download_success": download.get("success", ""),
+                        "download_total": download.get("total", ""),
+                        "summary_rows": eval_summary.get("summary_rows", ""),
+                        "score_rows": eval_summary.get("score_rows", ""),
+                        "parquet_path": payload.get("parquet_path", ""),
+                        "warnings": "; ".join(str(item) for item in warnings[:3]),
+                    }
+                )
+            st.dataframe(pd.DataFrame(rows), width="stretch", hide_index=True)
+        specsheet_pdf = summary.get("specsheet_pdf", "")
+        if specsheet_pdf:
+            st.write(f"✅ **Specsheet PDF:** `{specsheet_pdf}`")
+    else:
+        st.json(summary)
diff --git a/evaluation_dashboard_app/pages/10_Help.py b/evaluation_dashboard_app/pages/10_Help.py
index 8c9df7f..857b7b9 100644
--- a/evaluation_dashboard_app/pages/10_Help.py
+++ b/evaluation_dashboard_app/pages/10_Help.py
@@ -1,11 +1,9 @@
-import json
 import re
-import uuid
 from pathlib import Path
 
 import streamlit as st
-import streamlit.components.v1 as components
 
+from lib.mermaid_render import render_mermaid
 from lib.page_chrome import inject_app_page_styles, render_page_hero
 
 st.set_page_config(
@@ -18,37 +16,17 @@
 render_page_hero(
     kicker="Documentation",
     title="Help & guide",
-    description="In-app copy of the project README — setup, pages, and workflows for the evaluation dashboard.",
+    description="In-app copy of the project README with a simple Japanese / English switch.",
     mode="Single Run",
 )
 
 # Streamlit markdown does not run Mermaid; split fenced ```mermaid blocks and render via Mermaid.js.
 MERMAID_FENCE = re.compile(r"```mermaid\s*\n([\s\S]*?)```", re.IGNORECASE)
 IMAGE_PATTERN = re.compile(r"!\[(.*?)\]\((.*?)\)")
-
-
-def _render_mermaid(definition: str) -> None:
-    """Render a Mermaid diagram inside an HTML component (CDN script)."""
-    defn_json = json.dumps(definition.strip())
-    uid = uuid.uuid4().hex[:12]
-    html = f"""
-<div id="mermaid-host-{uid}" style="overflow:auto;max-width:100%;padding:0.25rem 0;"></div>
-<script src="https://cdn.jsdelivr.net/npm/mermaid@10.9.0/dist/mermaid.min.js"></script>
-<script>
-(function() {{
-  const defn = {defn_json};
-  const host = document.getElementById("mermaid-host-{uid}");
-  mermaid.initialize({{ startOnLoad: false, theme: "neutral", securityLevel: "loose" }});
-  const graphId = "mermaid-graph-{uid}";
-  mermaid.render(graphId, defn).then(function(res) {{
-    host.innerHTML = res.svg;
-  }}).catch(function(err) {{
-    host.textContent = "Mermaid diagram could not be rendered: " + String(err);
-  }});
-}})();
-</script>
-"""
-    components.html(html, height=480, scrolling=True)
+README_FILES = {
+    "Japanese": Path("Readme.md"),
+    "English": Path("Readme.en.md"),
+}
 
 
 def _render_markdown_with_images(chunk: str) -> None:
@@ -69,11 +47,22 @@ def _render_markdown_with_images(chunk: str) -> None:
             break
 
 
-readme_path = Path("Readme.md")
-content = readme_path.read_text(encoding="utf-8")
+language = st.radio(
+    "README language",
+    options=list(README_FILES.keys()),
+    horizontal=True,
+    label_visibility="collapsed",
+)
+
+selected_readme_path = README_FILES[language]
+if not selected_readme_path.exists():
+    st.error(f"README file not found: {selected_readme_path}")
+    st.stop()
+
+content = selected_readme_path.read_text(encoding="utf-8")
 
 for idx, piece in enumerate(MERMAID_FENCE.split(content)):
     if idx % 2 == 0:
         _render_markdown_with_images(piece)
     else:
-        _render_mermaid(piece)
+        render_mermaid(piece)
diff --git a/evaluation_dashboard_app/pages/11_T4_Dataset_Server.py b/evaluation_dashboard_app/pages/11_T4_Dataset_Server.py
new file mode 100644
index 0000000..7297d87
--- /dev/null
+++ b/evaluation_dashboard_app/pages/11_T4_Dataset_Server.py
@@ -0,0 +1,447 @@
+"""
+Exercise the T4 visualizer HTTP API (``t4-server``): ``GET /health``, ``GET /datasets``,
+``GET /datasets/{t4dataset_id}/scenarios``, and ``POST /render``.
+Build embeddable JSON / query strings for T4 dataset context and render payloads.
+"""
+from __future__ import annotations
+
+import json
+import os
+from typing import Any, List, Optional
+
+import pandas as pd
+import streamlit as st
+
+from lib.page_chrome import inject_app_page_styles, render_page_hero, section_header
+from lib.t4_dataset_embed import (
+    build_render_request_embed,
+    t4_dataset_context,
+    t4_share_query_params,
+    target_objects_from_rows,
+)
+from lib.t4_visualizer_client import (
+    DEFAULT_BASE_URL,
+    ENV_BASE_URL,
+    RenderRequest,
+    T4VisualizerClient,
+    T4VisualizerError,
+    TargetObjectIn,
+    render_request_to_json_body,
+    render_response_json_for_debug,
+    target_object_from_gt_row,
+)
+
+st.set_page_config(
+    page_title="T4 dataset server",
+    page_icon="📡",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+inject_app_page_styles()
+
+render_page_hero(
+    kicker="Integration",
+    title="T4 dataset server & embed helpers",
+    description=(
+        "Call the Tier4 visualizer HTTP service (same client as Bounding Box Viewer): health, dataset list, "
+        "scenarios per dataset (names and frame counts), camera render. Fetch lists, pick ids from the server "
+        "or type your own, then render or copy embed JSON."
+    ),
+    mode="Single Run",
+)
+
+if "t4_test_base_url" not in st.session_state:
+    st.session_state["t4_test_base_url"] = os.environ.get(ENV_BASE_URL, DEFAULT_BASE_URL).rstrip("/")
+
+# Cached API results for pickers
+if "t4_dataset_ids" not in st.session_state:
+    st.session_state["t4_dataset_ids"] = []
+if "t4_last_datasets_payload" not in st.session_state:
+    st.session_state["t4_last_datasets_payload"] = None
+if "t4_scenario_rows" not in st.session_state:
+    st.session_state["t4_scenario_rows"] = []
+if "t4_last_scenarios_payload" not in st.session_state:
+    st.session_state["t4_last_scenarios_payload"] = None
+
+
+def _hydrate_t4_from_url() -> None:
+    """Fill context + render/embed widgets from ``?render_json=…`` (same JSON as curl ``-d``)."""
+    qp = st.query_params
+    raw = qp.get("render_json")
+    if raw is None:
+        return
+    if isinstance(raw, list):
+        raw = raw[0] if raw else None
+    if not raw:
+        return
+    sig = f"render_json:{raw}"
+    if st.session_state.get("_t4_hydrate_sig") == sig:
+        return
+    try:
+        body = json.loads(str(raw))
+    except json.JSONDecodeError:
+        return
+    if not isinstance(body, dict):
+        return
+    st.session_state["t4_ctx_ds"] = str(body.get("t4dataset_id", ""))
+    st.session_state["t4_ctx_scen"] = str(body.get("scenario_name", ""))
+    try:
+        st.session_state["t4_ctx_frame"] = int(body.get("frame_index", 0))
+    except (TypeError, ValueError):
+        st.session_state["t4_ctx_frame"] = 0
+    ver = body.get("version")
+    st.session_state["t4_ctx_ver"] = "" if ver is None else str(ver)
+    to = body.get("target_objects")
+    if isinstance(to, list):
+        tgt = json.dumps(to, ensure_ascii=False, indent=2)
+        st.session_state["t4_emb_rows"] = tgt
+        st.session_state["t4_render_targets"] = tgt
+        st.session_state["t4_render_use_tgt"] = len(to) > 0
+    else:
+        st.session_state["t4_emb_rows"] = "[]"
+        st.session_state["t4_render_targets"] = "[]"
+        st.session_state["t4_render_use_tgt"] = False
+    st.session_state["t4_render_crop"] = bool(body.get("crop_cameras", False))
+    st.session_state["t4_render_ann"] = bool(body.get("show_annotations", True))
+    st.session_state["_t4_hydrate_sig"] = sig
+
+
+_hydrate_t4_from_url()
+
+base_url = st.sidebar.text_input(
+    "Server base URL",
+    key="t4_test_base_url",
+    help=f"Override env {ENV_BASE_URL} for this session.",
+)
+timeout_s = st.sidebar.number_input("HTTP timeout (s)", min_value=5.0, max_value=600.0, value=120.0, step=5.0)
+
+
+def _client() -> T4VisualizerClient:
+    return T4VisualizerClient(base_url=(base_url or "").strip() or DEFAULT_BASE_URL, timeout=float(timeout_s))
+
+
+def _bash_single_quoted(s: str) -> str:
+    """Wrap *s* for safe use as a bash single-quoted string (e.g. ``-d '…'``)."""
+    return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+def _on_dataset_pick() -> None:
+    sel = st.session_state.get("t4_pick_ds", "—")
+    if sel != "—":
+        st.session_state["t4_ctx_ds"] = sel
+
+
+def _on_scenario_pick() -> None:
+    sel = st.session_state.get("t4_pick_scen", "—")
+    if sel != "—":
+        st.session_state["t4_ctx_scen"] = sel
+
+
+# --- Shared context (dataset, version, scenario, frame) ---------------------------------
+section_header(
+    "Context",
+    "Fetch lists from the server, then choose **t4dataset_id** and **scenario_name** from the dropdowns "
+    "or type any value in the text fields.",
+)
+
+row_fetch = st.columns([1, 1, 2])
+with row_fetch[0]:
+    if st.button("GET /datasets", type="primary", key="t4_btn_datasets"):
+        try:
+            d = _client().list_datasets()
+            st.session_state["t4_last_datasets_payload"] = d
+            ds = d.get("datasets")
+            st.session_state["t4_dataset_ids"] = [str(x) for x in ds] if isinstance(ds, list) else []
+            st.session_state["t4_scenario_rows"] = []
+            st.session_state["t4_last_scenarios_payload"] = None
+            st.success(f"OK — {len(st.session_state['t4_dataset_ids'])} dataset id(s).")
+        except T4VisualizerError as ex:
+            st.error(f"{ex} (status={ex.status_code})")
+            if ex.response_text:
+                st.code(ex.response_text[:4000], language="text")
+        except OSError as ex:
+            st.error(f"Network error: {ex}")
+
+with row_fetch[1]:
+    if st.button("GET /datasets/…/scenarios", type="primary", key="t4_btn_scenarios"):
+        _tid = (st.session_state.get("t4_ctx_ds") or "").strip()
+        if not _tid:
+            st.warning("Set **t4dataset_id** first.")
+        else:
+            try:
+                _ver = (st.session_state.get("t4_ctx_ver") or "").strip() or None
+                out = _client().list_dataset_scenarios(_tid, version=_ver)
+                st.session_state["t4_last_scenarios_payload"] = out
+                rows = out.get("scenarios")
+                st.session_state["t4_scenario_rows"] = rows if isinstance(rows, list) else []
+                st.success(f"OK — {len(st.session_state['t4_scenario_rows'])} scenario(s).")
+            except T4VisualizerError as ex:
+                st.error(f"{ex} (status={ex.status_code})")
+                if ex.response_text:
+                    st.code(ex.response_text[:4000], language="text")
+            except OSError as ex:
+                st.error(f"Network error: {ex}")
+
+with row_fetch[2]:
+    if st.session_state.get("t4_last_datasets_payload") is not None:
+        with st.expander("Last GET /datasets JSON", expanded=False):
+            st.json(st.session_state["t4_last_datasets_payload"])
+    if st.session_state.get("t4_last_scenarios_payload") is not None:
+        with st.expander("Last GET /datasets/…/scenarios JSON", expanded=False):
+            st.json(st.session_state["t4_last_scenarios_payload"])
+
+_ids = st.session_state["t4_dataset_ids"]
+_ds_options = ["—"] + sorted(_ids)
+_name_rows = st.session_state["t4_scenario_rows"]
+_scen_names: List[str] = []
+for r in _name_rows:
+    if isinstance(r, dict) and r.get("name") is not None:
+        _scen_names.append(str(r["name"]))
+_scen_options = ["—"] + sorted(set(_scen_names))
+
+c1, c2, c3, c4 = st.columns(4)
+with c1:
+    st.selectbox(
+        "Pick dataset (from last /datasets)",
+        options=_ds_options,
+        key="t4_pick_ds",
+        on_change=_on_dataset_pick,
+        help="Choose a server-reported id, or leave as — and type below.",
+    )
+    st.text_input(
+        "t4dataset_id",
+        key="t4_ctx_ds",
+        placeholder="uuid or folder id",
+    )
+with c2:
+    st.text_input(
+        "version (optional)",
+        key="t4_ctx_ver",
+        help="Annotation dir version; passed to scenarios and render when non-empty.",
+    )
+with c3:
+    st.selectbox(
+        "Pick scenario (from last /scenarios)",
+        options=_scen_options,
+        key="t4_pick_scen",
+        on_change=_on_scenario_pick,
+        help="Choose **name** from the server, or type any scenario below.",
+    )
+    st.text_input(
+        "scenario_name",
+        key="t4_ctx_scen",
+        placeholder="scene name for POST /render",
+    )
+with c4:
+    st.number_input("frame_index", min_value=0, value=0, step=1, key="t4_ctx_frame")
+
+if _name_rows:
+    st.caption(
+        "Valid **frame_index** for each scene is **0 … nbr_samples − 1** (see table). "
+        "Use **Render & embed** to request PNGs."
+    )
+    st.dataframe(pd.DataFrame(_name_rows), width='stretch', hide_index=True)
+
+st.divider()
+
+tab_overview, tab_render = st.tabs(["Overview", "Render & embed JSON"])
+
+with tab_overview:
+    section_header("/health", "GET — server liveness.")
+    if st.button("GET /health", type="primary", key="t4_btn_health"):
+        try:
+            h = _client().health()
+            st.success("OK")
+            st.json(h)
+        except T4VisualizerError as ex:
+            st.error(f"{ex} (status={ex.status_code})")
+            if ex.response_text:
+                st.code(ex.response_text[:4000], language="text")
+        except OSError as ex:
+            st.error(f"Network error: {ex}")
+
+with tab_render:
+    section_header("POST /render", "Request camera PNGs; optional ``target_objects`` from JSON below.")
+    ds_id = (st.session_state.get("t4_ctx_ds") or "").strip()
+    scen = (st.session_state.get("t4_ctx_scen") or "").strip()
+    frame = int(st.session_state.get("t4_ctx_frame") or 0)
+    ver_raw = (st.session_state.get("t4_ctx_ver") or "").strip()
+    version_opt: Optional[str] = ver_raw if ver_raw else None
+
+    st.caption(
+        f"Using context: **t4dataset_id**=`{ds_id or '…'}` · **scenario_name**=`{scen or '…'}` · "
+        f"**frame_index**={frame}"
+        + (f" · **version**=`{version_opt}`" if version_opt else "")
+    )
+
+    tgt_json = st.text_area(
+        "target_objects (JSON array, optional)",
+        value="[]",
+        height=140,
+        key="t4_render_targets",
+        help="List of objects with uuid/x/y/z/label/width/length/height/yaw (matches GT row shape).",
+    )
+    o1, o2, o3 = st.columns(3)
+    with o1:
+        crop = st.checkbox("crop_cameras", value=False, key="t4_render_crop")
+    with o2:
+        show_ann = st.checkbox("show_annotations", value=True, key="t4_render_ann")
+    with o3:
+        overlay_gt = st.checkbox("Use target_objects in request", value=True, key="t4_render_use_tgt")
+
+    req: Optional[RenderRequest] = None
+    parse_err: Optional[str] = None
+    if overlay_gt:
+        try:
+            raw = json.loads(tgt_json or "[]")
+            if not isinstance(raw, list):
+                parse_err = "target_objects JSON must be an array"
+            else:
+                objs: List[TargetObjectIn] = []
+                for item in raw:
+                    if not isinstance(item, dict):
+                        parse_err = "each target must be an object"
+                        break
+                    d = target_object_from_gt_row(item)
+                    objs.append(TargetObjectIn(**d))
+                if parse_err is None:
+                    req = RenderRequest(
+                        t4dataset_id=ds_id,
+                        scenario_name=scen,
+                        frame_index=frame,
+                        target_objects=objs,
+                        crop_cameras=crop,
+                        show_annotations=show_ann,
+                        version=version_opt,
+                    )
+        except json.JSONDecodeError as ex:
+            parse_err = f"Invalid JSON: {ex}"
+    else:
+        req = RenderRequest(
+            t4dataset_id=ds_id,
+            scenario_name=scen,
+            frame_index=frame,
+            target_objects=[],
+            crop_cameras=crop,
+            show_annotations=show_ann,
+            version=version_opt,
+        )
+
+    if parse_err:
+        st.warning(parse_err)
+
+    col_go, col_prev = st.columns([1, 2])
+    with col_go:
+        do_render = st.button("POST /render", type="primary", key="t4_btn_render", disabled=req is None)
+    with col_prev:
+        if req is not None:
+            with st.expander("Request body preview", expanded=False):
+                st.json(render_request_to_json_body(req))
+
+    if do_render and req is not None:
+        try:
+            with st.spinner("Rendering…"):
+                res = _client().render(req)
+            imgs = res.decode_all_images()
+            cap_parts = [
+                f"sample_token={res.sample_token!r}",
+                f"timestamp_us={res.timestamp_us}",
+            ]
+            if res.elapsed_ms is not None:
+                cap_parts.append(f"elapsed_ms={res.elapsed_ms}")
+            if res.tier4_load_ms is not None:
+                cap_parts.append(f"tier4_load_ms={res.tier4_load_ms}")
+            if res.render_ms is not None:
+                cap_parts.append(f"render_ms={res.render_ms}")
+            st.caption(" · ".join(cap_parts))
+            if res.raw_json is not None:
+                with st.expander("Response JSON (debug)", expanded=False):
+                    st.json(render_response_json_for_debug(res.raw_json))
+            if not imgs:
+                st.info("No images in response.")
+            else:
+                n = min(len(imgs), 6)
+                cols = st.columns(n)
+                for i in range(n):
+                    label, png = imgs[i]
+                    cols[i].image(png, caption=label, width='stretch')
+                if len(imgs) > n:
+                    st.caption(f"Showing first {n} of {len(imgs)} images.")
+        except T4VisualizerError as ex:
+            st.error(f"{ex} (status={ex.status_code})")
+            if ex.response_text:
+                st.code(ex.response_text[:4000], language="text")
+        except OSError as ex:
+            st.error(f"Network error: {ex}")
+
+    st.divider()
+    section_header(
+        "Embed helpers",
+        "Same **context** fields as above. Copy structured context, query strings, and full ``POST /render`` JSON.",
+    )
+
+    emb_ds = (st.session_state.get("t4_ctx_ds") or "").strip()
+    emb_scen = (st.session_state.get("t4_ctx_scen") or "").strip()
+    emb_frame = int(st.session_state.get("t4_ctx_frame") or 0)
+
+    emb_ta = st.text_area(
+        "Optional GT rows as JSON array (for target_objects_from_rows)",
+        value="[]",
+        height=120,
+        key="t4_emb_rows",
+    )
+
+    rows_err: Optional[str] = None
+    rows_list: List[dict[str, Any]] = []
+    try:
+        parsed = json.loads(emb_ta or "[]")
+        if not isinstance(parsed, list):
+            rows_err = "Must be a JSON array"
+        else:
+            for i, row in enumerate(parsed):
+                if not isinstance(row, dict):
+                    rows_err = f"Item {i} is not an object"
+                    break
+            if rows_err is None:
+                rows_list = [r for r in parsed if isinstance(r, dict)]
+    except json.JSONDecodeError as ex:
+        rows_err = str(ex)
+
+    if rows_err:
+        st.warning(rows_err)
+
+    ctx = t4_dataset_context(emb_ds, emb_scen, frame_index=emb_frame)
+    emb_ver = (st.session_state.get("t4_ctx_ver") or "").strip()
+    full = build_render_request_embed(
+        emb_ds,
+        emb_scen,
+        emb_frame,
+        target_rows=rows_list if rows_list else None,
+        show_annotations=bool(st.session_state.get("t4_render_ann", True)),
+        crop_cameras=bool(st.session_state.get("t4_render_crop", False)),
+        version=emb_ver if emb_ver else None,
+    )
+    viz_base = (base_url or "").strip().rstrip("/") or DEFAULT_BASE_URL
+    q = t4_share_query_params(emb_ds, emb_scen, frame_index=emb_frame)
+    render_get_url = f"{viz_base}/render?{q}"
+
+    st.subheader("Render GET URL")
+    st.caption(
+        "GET-style URL on the **visualizer server** (same **Server base URL** as API calls). "
+        "Requires **GET /render** with ``t4dataset_id``, ``scenario_name``, ``frame_index``; otherwise use **curl** (POST JSON) below."
+    )
+    st.markdown(f"[{render_get_url}]({render_get_url})")
+
+    if rows_list:
+        st.subheader("target_objects_from_rows (preview)")
+        st.json(target_objects_from_rows(rows_list))
+
+    curl_base = (base_url or "").strip() or DEFAULT_BASE_URL
+    body_pretty = json.dumps(full["post_render_json"], indent=2, ensure_ascii=False)
+    curl_lines = (
+        f"curl -sS {curl_base}/render \\\n"
+        f"  -H 'Content-Type: application/json' \\\n"
+        f"  -d {_bash_single_quoted(body_pretty)}"
+    )
+    st.subheader("curl")
+    st.code(curl_lines, language="bash")
diff --git a/evaluation_dashboard_app/pages/12_Prediction_Evaluation.py b/evaluation_dashboard_app/pages/12_Prediction_Evaluation.py
new file mode 100644
index 0000000..515990c
--- /dev/null
+++ b/evaluation_dashboard_app/pages/12_Prediction_Evaluation.py
@@ -0,0 +1,1038 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Callable
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import pyarrow.parquet as pq
+import streamlit as st
+
+from lib.overview_url_hydrate import try_hydrate_session_from_overview_query_params
+from lib.page_chrome import inject_app_page_styles, render_loaded_data_section, render_page_hero, section_header
+from lib.path_utils import get_run_display_name, list_run_directories, path_display
+from lib.prediction_eval import build_specsheet_aligned_prediction_artifacts
+
+
+st.set_page_config(
+    layout="wide",
+    page_title="Prediction Evaluation",
+    page_icon="🧭",
+    initial_sidebar_state="expanded",
+)
+inject_app_page_styles()
+st.markdown(
+    """
+    <style>
+    .pred-chip-row {
+        display:flex;
+        flex-wrap:wrap;
+        gap:0.55rem;
+        margin:0.35rem 0 1.0rem 0;
+    }
+    .pred-chip {
+        border:1px solid #d6dee7;
+        border-radius:999px;
+        padding:0.38rem 0.8rem;
+        background:linear-gradient(180deg, #ffffff 0%, #f8fbfc 100%);
+        color:#254051;
+        font-size:0.82rem;
+        font-weight:600;
+    }
+    .pred-card {
+        border:1px solid #dce6ee;
+        border-radius:18px;
+        background:linear-gradient(145deg, #fcfefe 0%, #f6fafb 48%, #f7fbff 100%);
+        padding:1rem 1.1rem;
+        box-shadow:0 18px 45px -28px rgba(13, 45, 58, 0.28);
+        min-height:128px;
+    }
+    .pred-card-kicker {
+        font-size:0.68rem;
+        letter-spacing:0.14em;
+        text-transform:uppercase;
+        color:#5b7283;
+        font-weight:800;
+    }
+    .pred-card-value {
+        font-size:1.8rem;
+        line-height:1.05;
+        letter-spacing:-0.04em;
+        color:#0f172a;
+        font-weight:850;
+        margin-top:0.5rem;
+    }
+    .pred-card-note {
+        margin-top:0.55rem;
+        color:#4a6577;
+        font-size:0.88rem;
+        line-height:1.45;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+
+PLOTLY_COLORS = {
+    "ink": "#12344d",
+    "teal": "#0f766e",
+    "blue": "#1d4ed8",
+    "amber": "#c27803",
+    "rose": "#be123c",
+    "slate": "#475569",
+}
+DEFAULT_TOPIC = "perception.object_recognition.objects"
+CHECKPOINTS = (1.0, 3.0, 5.0)
+METRIC_ORDER = [
+    "minADE@1s",
+    "minADE@3s",
+    "minADE@5s",
+    "minFDE@1s",
+    "minFDE@3s",
+    "minFDE@5s",
+]
+APP_CACHE_ROOT = ".dashboard_cache"
+ARTIFACT_DIRNAME = "prediction_eval_cache"
+PREDICTION_CACHE_VERSION = 4
+ARTIFACT_TABLES = ["label_summary", "distance_summary", "polar_summary"]
+R_MAX, R_STEP, R_INI = 200, 20, 0
+THETA_STEP, THETA_INI = 60, -60
+THETA_MAX = THETA_INI + 360
+R_LABELS = [f"{i}-{i + R_STEP}" for i in range(R_INI, R_MAX, R_STEP)]
+R_EDGES = np.arange(R_INI, R_MAX + R_STEP, R_STEP)
+THETA_LABELS = [f"{i}-{i + THETA_STEP}" for i in range(THETA_INI, THETA_MAX, THETA_STEP)]
+THETA_EDGES_DEG = np.arange(THETA_INI, THETA_MAX + THETA_STEP, THETA_STEP)
+DISTANCE_BIN_ORDER = [
+    "0-20 m",
+    "20-40 m",
+    "40-60 m",
+    "60-80 m",
+    "80-100 m",
+    "100-120 m",
+    "120-140 m",
+    "140-160 m",
+    "160-180 m",
+    "180-200 m",
+    "200+ m",
+]
+
+
+def render_stat_card(kicker: str, value: str, note: str) -> None:
+    st.markdown(
+        f"""
+        <div class="pred-card">
+          <div class="pred-card-kicker">{kicker}</div>
+          <div class="pred-card-value">{value}</div>
+          <div class="pred-card-note">{note}</div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def ordered_distance_bins(values: list[str] | pd.Index) -> list[str]:
+    present = {str(v) for v in values if pd.notna(v)}
+    canonical_order = DISTANCE_BIN_ORDER + R_LABELS
+    ordered = [v for v in canonical_order if v in present]
+    leftovers = sorted(present - set(ordered), key=lambda v: (len(v), v))
+    return ordered + leftovers
+
+
+def build_distance_ring_figure(metric_df: pd.DataFrame, label_order: list[str], metric_name: str) -> go.Figure:
+    ring_order = ordered_distance_bins(metric_df["r"].tolist())
+    pivot = (
+        metric_df.pivot(index="label", columns="r", values="value")
+        .reindex(index=label_order)
+        .reindex(columns=ring_order)
+    )
+    theta_width = 360 / max(len(label_order), 1)
+    theta_centers = [i * theta_width for i in range(len(label_order))]
+    zmin = float(np.nanmin(pivot.values)) if np.isfinite(np.nanmin(pivot.values)) else 0.0
+    zmax = float(np.nanmax(pivot.values)) if np.isfinite(np.nanmax(pivot.values)) else 1.0
+    if zmin == zmax:
+        zmax = zmin + 1.0
+
+    fig = go.Figure()
+    for ring_idx, ring_name in enumerate(ring_order):
+        vals = pivot[ring_name].tolist()
+        fig.add_trace(
+            go.Barpolar(
+                r=[1.0] * len(label_order),
+                base=[ring_idx] * len(label_order),
+                theta=theta_centers,
+                width=[theta_width * 0.92] * len(label_order),
+                marker=dict(
+                    color=vals,
+                    colorscale="YlOrRd",
+                    cmin=zmin,
+                    cmax=zmax,
+                    line=dict(color="rgba(255,255,255,0.35)", width=1),
+                    colorbar=dict(title="m") if ring_idx == len(ring_order) - 1 else None,
+                ),
+                customdata=np.array([[label_order[i], ring_name, vals[i]] for i in range(len(label_order))], dtype=object),
+                hovertemplate="label=%{customdata[0]}<br>distance=%{customdata[1]}<br>value=%{customdata[2]:.3f} m<extra></extra>",
+                showlegend=False,
+            )
+        )
+
+    fig.update_layout(
+        title=metric_name,
+        height=430,
+        margin=dict(l=10, r=10, t=55, b=10),
+        polar=dict(
+            radialaxis=dict(
+                tickmode="array",
+                tickvals=list(range(len(ring_order))),
+                ticktext=ring_order,
+                angle=90,
+                gridcolor="rgba(148,163,184,0.25)",
+            ),
+            angularaxis=dict(
+                tickmode="array",
+                tickvals=theta_centers,
+                ticktext=label_order,
+                rotation=90,
+                direction="clockwise",
+                gridcolor="rgba(148,163,184,0.20)",
+            ),
+            bgcolor="rgba(248,250,252,0.75)",
+        ),
+    )
+    return fig
+
+
+def build_theta_ring_figure(label_polar: pd.DataFrame, metric_name: str, label_name: str, value_col: str, *, delta_mode: bool) -> go.Figure:
+    theta_order = THETA_LABELS
+    radial_order = [r for r in R_LABELS if r in set(label_polar["r"].astype(str))]
+    pivot = (
+        label_polar.pivot(index="r", columns="theta", values=value_col)
+        .reindex(index=radial_order, columns=theta_order)
+    )
+    theta_width = 360 / max(len(theta_order), 1)
+    theta_centers = [i * theta_width for i in range(len(theta_order))]
+
+    values = pivot.values.astype(float) if pivot.size else np.array([[0.0]])
+    finite = values[np.isfinite(values)]
+    if finite.size == 0:
+        zmin, zmax = (-1.0, 1.0) if delta_mode else (0.0, 1.0)
+    else:
+        if delta_mode:
+            bound = float(np.nanmax(np.abs(finite))) or 1.0
+            zmin, zmax = -bound, bound
+        else:
+            zmin, zmax = float(np.nanmin(finite)), float(np.nanmax(finite))
+            if zmin == zmax:
+                zmax = zmin + 1.0
+
+    fig = go.Figure()
+    for ring_idx, ring_name in enumerate(radial_order):
+        vals = pivot.loc[ring_name].tolist()
+        fig.add_trace(
+            go.Barpolar(
+                r=[1.0] * len(theta_order),
+                base=[ring_idx] * len(theta_order),
+                theta=theta_centers,
+                width=[theta_width * 0.92] * len(theta_order),
+                marker=dict(
+                    color=vals,
+                    colorscale="RdBu" if delta_mode else "YlOrRd",
+                    cmin=zmin,
+                    cmax=zmax,
+                    line=dict(color="rgba(255,255,255,0.32)", width=1),
+                    colorbar=dict(title="m") if ring_idx == len(radial_order) - 1 else None,
+                ),
+                customdata=np.array([[theta_order[i], ring_name, vals[i]] for i in range(len(theta_order))], dtype=object),
+                hovertemplate=("theta=%{customdata[0]}<br>distance=%{customdata[1]}<br>Δ=%{customdata[2]:+.3f} m<extra></extra>" if delta_mode else "theta=%{customdata[0]}<br>distance=%{customdata[1]}<br>value=%{customdata[2]:.3f} m<extra></extra>"),
+                showlegend=False,
+            )
+        )
+
+    fig.update_layout(
+        title=f"{label_name}{' (B - A)' if delta_mode else ''}",
+        height=320,
+        margin=dict(l=10, r=10, t=45, b=10),
+        polar=dict(
+            radialaxis=dict(
+                tickmode="array",
+                tickvals=list(range(len(radial_order))),
+                ticktext=radial_order,
+                angle=90,
+                gridcolor="rgba(148,163,184,0.22)",
+            ),
+            angularaxis=dict(
+                tickmode="array",
+                tickvals=theta_centers,
+                ticktext=theta_order,
+                rotation=90,
+                direction="clockwise",
+                gridcolor="rgba(148,163,184,0.18)",
+            ),
+            bgcolor="rgba(248,250,252,0.75)",
+        ),
+    )
+    return fig
+
+
+def render_compare_stat_card(kicker: str, a_value: float | None, b_value: float | None, note: str) -> None:
+    delta = None
+    if a_value is not None and b_value is not None and pd.notna(a_value) and pd.notna(b_value):
+        delta = float(b_value) - float(a_value)
+    delta_text = f"Δ {delta:+.2f} m" if delta is not None else "Δ n/a"
+    st.markdown(
+        f"""
+        <div class="pred-card">
+          <div class="pred-card-kicker">{kicker}</div>
+          <div class="pred-card-value">A {a_value:.2f} / B {b_value:.2f}</div>
+          <div class="pred-card-note">{delta_text}<br>{note}</div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _run_has_prediction_source(run_path: Path) -> bool:
+    return (run_path / "future.parquet").exists() or (run_path / "future.csv").exists()
+
+
+def _prediction_source_path(run_path: Path) -> Path | None:
+    csv_path = run_path / "future.csv"
+    if csv_path.exists():
+        return csv_path
+    parquet_path = run_path / "future.parquet"
+    if parquet_path.exists():
+        return parquet_path
+    return None
+
+
+@st.cache_data(show_spinner=False)
+def load_prediction_metadata(run_path_str: str) -> dict[str, float | int]:
+    future_path = _prediction_source_path(Path(run_path_str))
+    if future_path is None:
+        return {"row_count": 0, "row_groups": 0, "file_size_mb": 0.0, "source_kind": "missing"}
+    if future_path.suffix == ".parquet":
+        parquet_file = pq.ParquetFile(future_path)
+        return {
+            "row_count": int(parquet_file.metadata.num_rows),
+            "row_groups": int(parquet_file.metadata.num_row_groups),
+            "file_size_mb": future_path.stat().st_size / (1024 * 1024),
+            "source_kind": "parquet",
+        }
+    return {
+        "row_count": 0,
+        "row_groups": 0,
+        "file_size_mb": future_path.stat().st_size / (1024 * 1024),
+        "source_kind": "csv",
+    }
+
+
+def get_prediction_cache_dir(run_path: Path) -> Path:
+    return run_path / APP_CACHE_ROOT / ARTIFACT_DIRNAME
+
+
+def get_prediction_manifest_path(run_path: Path) -> Path:
+    return get_prediction_cache_dir(run_path) / "manifest.json"
+
+
+def get_prediction_table_path(run_path: Path, table_name: str) -> Path:
+    return get_prediction_cache_dir(run_path) / f"{table_name}.parquet"
+
+
+def load_prediction_artifact_manifest(run_path: Path) -> dict[str, object] | None:
+    manifest_path = get_prediction_manifest_path(run_path)
+    if not manifest_path.exists():
+        return None
+    try:
+        return json.loads(manifest_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+
+
+def prediction_artifacts_ready(run_path: Path) -> bool:
+    manifest = load_prediction_artifact_manifest(run_path)
+    future_path = _prediction_source_path(run_path)
+    if manifest is None or future_path is None or not future_path.exists():
+        return False
+    if manifest.get("cache_version") != PREDICTION_CACHE_VERSION:
+        return False
+    if manifest.get("future_mtime_ns") != future_path.stat().st_mtime_ns:
+        return False
+    return all(get_prediction_table_path(run_path, name).exists() for name in ARTIFACT_TABLES)
+
+
+def _noop_progress(_: float, __: str) -> None:
+    return None
+
+
+def save_prediction_artifacts(
+    run_path: Path,
+    artifacts: dict[str, pd.DataFrame],
+    progress_callback: Callable[[float, str], None] | None = None,
+) -> None:
+    report = progress_callback or _noop_progress
+    cache_dir = get_prediction_cache_dir(run_path)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    total_tables = max(len(ARTIFACT_TABLES), 1)
+    for idx, name in enumerate(ARTIFACT_TABLES, start=1):
+        report(0.88 + (0.09 * idx / total_tables), f"Saving `{name}` summary...")
+        artifacts[name].to_parquet(get_prediction_table_path(run_path, name), index=False)
+    manifest = {
+        "cache_version": PREDICTION_CACHE_VERSION,
+        "future_mtime_ns": _prediction_source_path(run_path).stat().st_mtime_ns,
+        "table_names": ARTIFACT_TABLES,
+    }
+    get_prediction_manifest_path(run_path).write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+    report(1.0, "Prediction summary cache is ready.")
+
+
+@st.cache_data(show_spinner=False)
+def load_saved_prediction_artifacts(run_path_str: str) -> dict[str, pd.DataFrame]:
+    run_path = Path(run_path_str)
+    out: dict[str, pd.DataFrame] = {}
+    for name in ARTIFACT_TABLES:
+        out[name] = pd.read_parquet(get_prediction_table_path(run_path, name))
+    return out
+
+
+def _build_prediction_eval_artifacts_impl(
+    run_path_str: str,
+    progress_callback: Callable[[float, str], None] | None = None,
+) -> dict[str, pd.DataFrame]:
+    report = progress_callback or _noop_progress
+    run_path = Path(run_path_str)
+    future_path = _prediction_source_path(run_path)
+    if future_path is None:
+        raise FileNotFoundError(f"No future.parquet or future.csv found in {run_path}")
+    report(0.05, f"Reading `{future_path.name}`...")
+    future_cols = [
+        "source",
+        "label",
+        "x",
+        "y",
+        "tx",
+        "ty",
+        "mode",
+        "future_index",
+        "relative_time",
+        "pair_uuid",
+        "frame_index",
+        "scenario_name",
+        "suite_name",
+        "uuid",
+        "confidence",
+    ]
+    if future_path.suffix == ".parquet":
+        schema = pq.read_schema(future_path).names
+        optional_cols = [c for c in ["topic_name"] if c in schema]
+        future_df = pd.read_parquet(future_path, columns=future_cols + optional_cols)
+    else:
+        future_df = pd.read_csv(future_path, usecols=lambda c: c in set(future_cols + ["topic_name"]))
+    if "topic_name" in future_df.columns:
+        report(0.18, "Filtering the default prediction topic...")
+        topic_values = future_df["topic_name"].dropna().astype(str).unique().tolist()
+        if DEFAULT_TOPIC in topic_values:
+            future_df = future_df[future_df["topic_name"].astype(str) == DEFAULT_TOPIC].copy()
+
+    report(0.3, "Matching prediction tracks against GT...")
+    report(0.45, "Computing per-track ADE/FDE summaries...")
+    report(0.62, "Aggregating metrics with specsheet-aligned distance-bin averaging...")
+
+    def report_aggregate_progress(inner_fraction: float, message: str) -> None:
+        report(0.3 + (0.54 * inner_fraction), message)
+
+    artifacts = build_specsheet_aligned_prediction_artifacts(
+        future_df,
+        checkpoints=CHECKPOINTS,
+        time_step=0.1,
+        max_error_m=100.0,
+        progress_callback=report_aggregate_progress,
+    )
+    if artifacts["label_summary"].empty:
+        report(0.85, "No matched tracks were found. Creating empty summary tables...")
+    else:
+        report(0.84, "Finalizing overall summary row...")
+    return artifacts
+
+
+@st.cache_data(show_spinner=False)
+def build_prediction_eval_artifacts(run_path_str: str) -> dict[str, pd.DataFrame]:
+    return _build_prediction_eval_artifacts_impl(run_path_str)
+
+
+def build_prediction_artifacts_with_progress(run_path: Path, build_label: str) -> None:
+    progress_slot = st.empty()
+    status_slot = st.empty()
+    progress_bar = progress_slot.progress(0, text=f"Starting {build_label} prediction summary build...")
+
+    def report(fraction: float, message: str) -> None:
+        bounded_fraction = max(0.0, min(1.0, float(fraction)))
+        progress_bar.progress(int(round(bounded_fraction * 100)), text=message)
+        status_slot.caption(f"{build_label}: {message}")
+
+    artifacts = _build_prediction_eval_artifacts_impl(str(run_path), progress_callback=report)
+    save_prediction_artifacts(run_path, artifacts, progress_callback=report)
+    st.cache_data.clear()
+    st.rerun()
+
+
+def merge_label_compare(label_a: pd.DataFrame, label_b: pd.DataFrame) -> pd.DataFrame:
+    merged = label_a.merge(label_b, on="label", how="outer", suffixes=("_A", "_B"))
+    for metric in METRIC_ORDER:
+        merged[f"{metric}_delta"] = merged[f"{metric}_B"] - merged[f"{metric}_A"]
+    return merged
+
+
+def merge_distance_compare(distance_a: pd.DataFrame, distance_b: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+    base = distance_a.copy()
+    base["run"] = "A"
+    cand = distance_b.copy()
+    cand["run"] = "B"
+    both = pd.concat([base, cand], ignore_index=True)
+    delta = distance_a.merge(distance_b, on=["label", "metric", "r"], how="outer", suffixes=("_A", "_B"))
+    delta["value_delta"] = delta["value_B"] - delta["value_A"]
+    return both, delta
+
+
+def merge_polar_compare(polar_a: pd.DataFrame, polar_b: pd.DataFrame) -> pd.DataFrame:
+    delta = polar_a.merge(polar_b, on=["label", "metric", "r", "theta"], how="outer", suffixes=("_A", "_B"))
+    delta["value_delta"] = delta["value_B"] - delta["value_A"]
+    return delta
+
+
+run_dirs = list_run_directories()
+run_dirs = [p for p in run_dirs if _run_has_prediction_source(p)]
+run_names = [get_run_display_name(p) for p in run_dirs]
+if not run_names:
+    st.warning("No run directories with `future.parquet` or `future.csv` found under `data/`.")
+    st.stop()
+
+try_hydrate_session_from_overview_query_params()
+mode_default = "Compare Mode" if st.session_state.get("mode") == "Compare Mode" else "Single Run"
+mode = st.sidebar.selectbox("Mode", ["Single Run", "Compare Mode"], index=0 if mode_default == "Single Run" else 1)
+
+session_run_path = st.session_state.get("runA", {}).get("path") if st.session_state.get("runA") else None
+default_run_name = get_run_display_name(session_run_path) if isinstance(session_run_path, Path) else run_names[0]
+if default_run_name not in run_names:
+    default_run_name = run_names[0]
+
+selected_run_a = st.sidebar.selectbox(
+    "Baseline (A)" if mode == "Compare Mode" else "Run",
+    run_names,
+    index=run_names.index(default_run_name),
+    help="Select a run directory containing `future.parquet`.",
+)
+selected_run_b = None
+if mode == "Compare Mode":
+    compare_candidates = [n for n in run_names if n != selected_run_a] or run_names
+    default_b = st.session_state.get("runB", {}).get("path").name if st.session_state.get("runB") else compare_candidates[0]
+    if default_b not in compare_candidates:
+        default_b = compare_candidates[0]
+    selected_run_b = st.sidebar.selectbox("Candidate (B)", compare_candidates, index=compare_candidates.index(default_b))
+
+run_path_a = next(p for p in run_dirs if get_run_display_name(p) == selected_run_a)
+run_path_b = next((p for p in run_dirs if get_run_display_name(p) == selected_run_b), None)
+metadata_a = load_prediction_metadata(str(run_path_a))
+cache_ready_a = prediction_artifacts_ready(run_path_a)
+metadata_b = load_prediction_metadata(str(run_path_b)) if run_path_b is not None else None
+cache_ready_b = prediction_artifacts_ready(run_path_b) if run_path_b is not None else False
+
+if mode == "Compare Mode" and run_path_b is not None:
+    render_loaded_data_section(
+        [
+            ("Baseline · A", path_display(run_path_a)),
+            ("Candidate · B", path_display(run_path_b)),
+        ]
+    )
+else:
+    render_loaded_data_section([("Prediction run", path_display(run_path_a))])
+render_page_hero(
+    kicker="Prediction quality",
+    title="Prediction evaluation",
+    description=(
+        "ADE/FDE summaries from `future.parquet`, computed from the cached prediction summary artifacts "
+        "and presented as interactive cards, ladders, and polar maps."
+    ),
+    mode=mode,
+    secondary_badge_inner_html="Prediction cache",
+)
+st.markdown(
+    f"""
+    <div class="pred-chip-row">
+      <div class="pred-chip">A: {int(metadata_a['row_count']):,} future rows</div>
+      <div class="pred-chip">A: {metadata_a['file_size_mb']:.1f} MB {metadata_a['source_kind']}</div>
+      <div class="pred-chip">A cache: {'ready' if cache_ready_a else 'not built'}</div>
+      {f'<div class="pred-chip">B: {int(metadata_b["row_count"]):,} future rows</div>' if metadata_b else ''}
+      {f'<div class="pred-chip">B: {metadata_b["file_size_mb"]:.1f} MB {metadata_b["source_kind"]}</div>' if metadata_b else ''}
+      {f'<div class="pred-chip">B cache: {"ready" if cache_ready_b else "not built"}</div>' if metadata_b else ''}
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
+
+build_col, info_col = st.columns([0.34, 0.66])
+with build_col:
+    build_clicked_a = st.button("Build A Summary", type="primary", use_container_width=True)
+    build_clicked_b = st.button("Build B Summary", use_container_width=True) if mode == "Compare Mode" and run_path_b is not None else False
+with info_col:
+    if mode == "Compare Mode":
+        status_lines = [
+            f"A `{selected_run_a}`: {'ready' if cache_ready_a else 'not built'}",
+            f"B `{selected_run_b}`: {'ready' if cache_ready_b else 'not built'}" if selected_run_b else "",
+        ]
+        if cache_ready_a and cache_ready_b:
+            st.success("Compare result is ready. Both cached summaries are available.")
+        else:
+            needed = []
+            if not cache_ready_a:
+                needed.append("Build A Summary")
+            if not cache_ready_b:
+                needed.append("Build B Summary")
+            st.info("Compare mode status:\n\n" + "\n\n".join([x for x in status_lines if x]) + f"\n\nNext step: press {' and '.join(needed)}.")
+    elif cache_ready_a:
+        st.success("Compact ADE/FDE summary tables are available for fast loading.")
+    else:
+        st.info(f"Run `{selected_run_a}` is not cached yet. Press Build A Summary to generate the result.")
+
+if build_clicked_a:
+    build_prediction_artifacts_with_progress(run_path_a, "A")
+
+if build_clicked_b and run_path_b is not None:
+    build_prediction_artifacts_with_progress(run_path_b, "B")
+
+if (mode == "Single Run" and not cache_ready_a) or (mode == "Compare Mode" and (not cache_ready_a or not cache_ready_b)):
+    section_header(
+        "Build Once, Open Fast",
+        "This page now stays responsive by loading only precomputed ADE/FDE summaries instead of processing the full future parquet on navigation.",
+    )
+    st.stop()
+
+artifacts_a = load_saved_prediction_artifacts(str(run_path_a))
+label_summary = artifacts_a["label_summary"].copy()
+distance_summary = artifacts_a["distance_summary"].copy()
+polar_summary = artifacts_a["polar_summary"].copy()
+artifacts_b = load_saved_prediction_artifacts(str(run_path_b)) if mode == "Compare Mode" and run_path_b is not None else None
+
+if label_summary.empty:
+    st.warning("No prediction summary data is available for this run.")
+    st.stop()
+
+available_labels = [x for x in label_summary["label"].astype(str).tolist() if x != "All"]
+
+overall_row = label_summary[label_summary["label"].astype(str) == "All"]
+if overall_row.empty:
+    overall_row = label_summary.head(1)
+overall = overall_row.iloc[0]
+compare_label = merge_label_compare(label_summary, artifacts_b["label_summary"]) if artifacts_b is not None else None
+distance_both = distance_delta = None
+polar_delta = None
+if artifacts_b is not None:
+    distance_both, distance_delta = merge_distance_compare(distance_summary, artifacts_b["distance_summary"])
+    polar_delta = merge_polar_compare(polar_summary, artifacts_b["polar_summary"])
+
+section_header(
+    "At A Glance",
+    "These cards mirror the kind of abstract specsheet readout we need in product review, but in a faster dashboard form.",
+)
+cards = st.columns(3)
+with cards[0]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minADE@1s <= 60m", overall_cmp["minADE@1s_A"], overall_cmp["minADE@1s_B"], "Best-of-K average displacement error within the near operating zone.")
+    else:
+        render_stat_card("minADE@1s <= 60m", f"{overall['minADE@1s']:.2f} m" if pd.notna(overall["minADE@1s"]) else "n/a", "Best-of-K average displacement error within the near operating zone.")
+with cards[1]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minADE@3s <= 60m", overall_cmp["minADE@3s_A"], overall_cmp["minADE@3s_B"], "Mid-horizon shape fidelity aligned with the specsheet future metric.")
+    else:
+        render_stat_card("minADE@3s <= 60m", f"{overall['minADE@3s']:.2f} m" if pd.notna(overall["minADE@3s"]) else "n/a", "Mid-horizon shape fidelity aligned with the specsheet future metric.")
+with cards[2]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minFDE@3s <= 60m", overall_cmp["minFDE@3s_A"], overall_cmp["minFDE@3s_B"], "Where the endpoint lands matters most in review discussions, so this gets prime placement.")
+    else:
+        render_stat_card("minFDE@3s <= 60m", f"{overall['minFDE@3s']:.2f} m" if pd.notna(overall["minFDE@3s"]) else "n/a", "Where the endpoint lands matters most in review discussions, so this gets prime placement.")
+
+cards2 = st.columns(3)
+with cards2[0]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minADE@5s <= 60m", overall_cmp["minADE@5s_A"], overall_cmp["minADE@5s_B"], "Longer horizon path quality, still scoped to the near-range summary window.")
+    else:
+        render_stat_card("minADE@5s <= 60m", f"{overall['minADE@5s']:.2f} m" if pd.notna(overall["minADE@5s"]) else "n/a", "Longer horizon path quality, still scoped to the near-range summary window.")
+with cards2[1]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minFDE@1s <= 60m", overall_cmp["minFDE@1s_A"], overall_cmp["minFDE@1s_B"], "Short horizon endpoint stability.")
+    else:
+        render_stat_card("minFDE@1s <= 60m", f"{overall['minFDE@1s']:.2f} m" if pd.notna(overall["minFDE@1s"]) else "n/a", "Short horizon endpoint stability.")
+with cards2[2]:
+    if compare_label is not None:
+        overall_cmp = compare_label[compare_label["label"] == "All"].iloc[0]
+        render_compare_stat_card("minFDE@5s <= 60m", overall_cmp["minFDE@5s_A"], overall_cmp["minFDE@5s_B"], "Longest specsheet-style endpoint metric.")
+    else:
+        render_stat_card("minFDE@5s <= 60m", f"{overall['minFDE@5s']:.2f} m" if pd.notna(overall["minFDE@5s"]) else "n/a", f"Longest specsheet-style endpoint metric. Source rows processed: {int(overall['future_rows']):,}.")
+
+section_header(
+    "Label Performance",
+    "All labels are shown together so you can compare actor classes without touching filters.",
+)
+label_view = label_summary[label_summary["label"].isin(available_labels)].copy()
+if compare_label is not None:
+    cmp_view = compare_label[compare_label["label"].isin(available_labels)].copy()
+    delta_long = cmp_view.melt(
+        id_vars=["label"],
+        value_vars=[f"{m}_delta" for m in METRIC_ORDER],
+        var_name="metric",
+        value_name="value",
+    )
+    delta_long["metric"] = delta_long["metric"].str.replace("_delta", "", regex=False)
+    heat = delta_long.pivot(index="label", columns="metric", values="value").reindex(columns=METRIC_ORDER)
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=heat.values,
+            x=list(heat.columns),
+            y=list(heat.index),
+            colorscale="RdBu",
+            zmid=0,
+            text=[[f"{v:+.2f}" if pd.notna(v) else "-" for v in row] for row in heat.values],
+            texttemplate="%{text}",
+            hovertemplate="label=%{y}<br>metric=%{x}<br>Δ=%{z:+.3f} m<extra></extra>",
+        )
+    )
+    fig.update_layout(
+        title="ADE/FDE delta matrix: B - A within <= 60m",
+        xaxis_title="Metric",
+        yaxis_title="Label",
+        height=max(360, 70 * len(heat.index)),
+        margin=dict(l=10, r=10, t=55, b=10),
+    )
+    st.plotly_chart(fig, width="stretch")
+elif not label_view.empty:
+    label_long = label_view.melt(
+        id_vars=["label"],
+        value_vars=METRIC_ORDER,
+        var_name="metric",
+        value_name="value",
+    )
+    heat = label_long.pivot(index="label", columns="metric", values="value").reindex(columns=METRIC_ORDER)
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=heat.values,
+            x=list(heat.columns),
+            y=list(heat.index),
+            colorscale="YlOrRd",
+            text=[[f"{v:.2f}" if pd.notna(v) else "-" for v in row] for row in heat.values],
+            texttemplate="%{text}",
+            hovertemplate="label=%{y}<br>metric=%{x}<br>value=%{z:.3f} m<extra></extra>",
+        )
+    )
+    fig.update_layout(
+        title="ADE/FDE matrix within <= 60m",
+        xaxis_title="Metric",
+        yaxis_title="Label",
+        height=max(360, 70 * len(heat.index)),
+        margin=dict(l=10, r=10, t=55, b=10),
+    )
+    st.plotly_chart(fig, width="stretch")
+
+section_header(
+    "Distance Ladder",
+    "Compare mode defaults to clearer views than a 14-line overlay: delta heatmaps, label small multiples, and the original raw lines only as a fallback.",
+)
+distance_view = distance_both if distance_both is not None else distance_summary[distance_summary["label"].isin(available_labels)].copy()
+if distance_both is not None and not distance_view.empty:
+    compare_tabs = st.tabs(["Delta Heatmap", "Label Small Multiples", "Raw Lines"])
+    with compare_tabs[0]:
+        for start in range(0, len(METRIC_ORDER), 3):
+            metric_chunk = METRIC_ORDER[start : start + 3]
+            cols = st.columns(len(metric_chunk))
+            for col, metric_name in zip(cols, metric_chunk):
+                with col:
+                    metric_delta = distance_delta[
+                        (distance_delta["metric"] == metric_name)
+                        & (distance_delta["label"].isin(available_labels))
+                    ].copy()
+                    if metric_delta.empty:
+                        st.caption(f"{metric_name}: no data")
+                        continue
+                    col_order = ordered_distance_bins(metric_delta["r"].tolist())
+                    pivot = (
+                        metric_delta.pivot(index="label", columns="r", values="value_delta")
+                        .reindex(index=available_labels)
+                        .reindex(columns=col_order)
+                    )
+                    fig = go.Figure(
+                        data=go.Heatmap(
+                            z=pivot.values,
+                            x=[str(v) for v in pivot.columns],
+                            y=[str(v) for v in pivot.index],
+                            colorscale="RdBu",
+                            zmid=0,
+                            text=[[f"{v:+.2f}" if pd.notna(v) else "-" for v in row] for row in pivot.values],
+                            texttemplate="%{text}",
+                            hovertemplate="label=%{y}<br>r=%{x}<br>Δ=%{z:+.3f} m<extra></extra>",
+                        )
+                    )
+                    fig.update_layout(
+                        title=metric_name,
+                        xaxis_title="Radius bin",
+                        yaxis_title="Label",
+                        height=max(320, 54 * len(available_labels)),
+                        margin=dict(l=10, r=10, t=45, b=10),
+                    )
+                    st.plotly_chart(fig, width="stretch", key=f"distance_delta_{metric_name}")
+    with compare_tabs[1]:
+        metric_tabs = st.tabs(METRIC_ORDER)
+        for metric_name, metric_tab in zip(METRIC_ORDER, metric_tabs):
+            with metric_tab:
+                metric_view = distance_view[
+                    (distance_view["metric"] == metric_name)
+                    & (distance_view["label"].isin(available_labels))
+                ].copy()
+                if metric_view.empty:
+                    st.info(f"No data for {metric_name}.")
+                    continue
+                metric_view["r"] = pd.Categorical(metric_view["r"], categories=ordered_distance_bins(metric_view["r"].tolist()), ordered=True)
+                for start in range(0, len(available_labels), 3):
+                    chunk = available_labels[start : start + 3]
+                    cols = st.columns(len(chunk))
+                    for col, label_name in zip(cols, chunk):
+                        with col:
+                            label_df = metric_view[metric_view["label"] == label_name].copy()
+                            if label_df.empty:
+                                st.caption(f"{label_name}: no data")
+                                continue
+                            fig = px.line(
+                                label_df,
+                                x="r",
+                                y="value",
+                                color="run",
+                                markers=True,
+                                labels={"r": "Radius bin", "value": "Error (m)", "run": "Run"},
+                                title=label_name,
+                                color_discrete_map={"A": PLOTLY_COLORS["ink"], "B": PLOTLY_COLORS["amber"]},
+                            )
+                            fig.update_layout(height=280, margin=dict(l=10, r=10, t=45, b=10), legend_title="Run")
+                            st.plotly_chart(fig, width="stretch", key=f"distance_small_{metric_name}_{label_name}")
+    with compare_tabs[2]:
+        fig = px.line(
+            distance_view[distance_view["label"].isin(available_labels)],
+            x="r",
+            y="value",
+            color="label",
+            line_dash="run",
+            markers=True,
+            facet_col="metric",
+            facet_col_wrap=3,
+            category_orders={"r": ordered_distance_bins(distance_view["r"].tolist())},
+            labels={"r": "Radius bin (m)", "value": "Error (m)", "label": "Label", "run": "Run"},
+            title="ADE/FDE by distance bin: A vs B",
+            color_discrete_sequence=[
+                PLOTLY_COLORS["ink"],
+                PLOTLY_COLORS["blue"],
+                PLOTLY_COLORS["teal"],
+                PLOTLY_COLORS["amber"],
+                PLOTLY_COLORS["rose"],
+                PLOTLY_COLORS["slate"],
+                "#8b5cf6",
+            ],
+        )
+        fig.update_layout(height=760, margin=dict(l=10, r=10, t=55, b=10), legend_title="Label / Run")
+        fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
+        st.plotly_chart(fig, width="stretch", key="distance_raw_compare")
+elif not distance_view.empty:
+    single_tabs = st.tabs(["Lines", "Metric Heatmaps", "Circular Rings", "Label Small Multiples"])
+    with single_tabs[0]:
+        fig = px.line(
+            distance_view,
+            x="r",
+            y="value",
+            color="label",
+            markers=True,
+            facet_col="metric",
+            facet_col_wrap=3,
+            category_orders={"r": ordered_distance_bins(distance_view["r"].tolist())},
+            labels={"r": "Radius bin (m)", "value": "Error (m)", "label": "Label"},
+            title="ADE/FDE by distance bin",
+            color_discrete_sequence=[
+                PLOTLY_COLORS["ink"],
+                PLOTLY_COLORS["blue"],
+                PLOTLY_COLORS["teal"],
+                PLOTLY_COLORS["amber"],
+                PLOTLY_COLORS["rose"],
+                PLOTLY_COLORS["slate"],
+                "#8b5cf6",
+            ],
+        )
+        fig.update_layout(height=760, margin=dict(l=10, r=10, t=55, b=10), legend_title="Label")
+        fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
+        st.plotly_chart(fig, width="stretch", key="distance_single_lines")
+    with single_tabs[1]:
+        for start in range(0, len(METRIC_ORDER), 3):
+            metric_chunk = METRIC_ORDER[start : start + 3]
+            cols = st.columns(len(metric_chunk))
+            for col, metric_name in zip(cols, metric_chunk):
+                with col:
+                    metric_df = distance_view[
+                        (distance_view["metric"] == metric_name)
+                        & (distance_view["label"].isin(available_labels))
+                    ].copy()
+                    if metric_df.empty:
+                        st.caption(f"{metric_name}: no data")
+                        continue
+                    col_order = ordered_distance_bins(metric_df["r"].tolist())
+                    pivot = (
+                        metric_df.pivot(index="label", columns="r", values="value")
+                        .reindex(index=available_labels)
+                        .reindex(columns=col_order)
+                    )
+                    fig = go.Figure(
+                        data=go.Heatmap(
+                            z=pivot.values,
+                            x=[str(v) for v in pivot.columns],
+                            y=[str(v) for v in pivot.index],
+                            colorscale="YlOrRd",
+                            text=[[f"{v:.2f}" if pd.notna(v) else "-" for v in row] for row in pivot.values],
+                            texttemplate="%{text}",
+                            hovertemplate="label=%{y}<br>r=%{x}<br>value=%{z:.3f} m<extra></extra>",
+                        )
+                    )
+                    fig.update_layout(
+                        title=metric_name,
+                        xaxis_title="Radius bin",
+                        yaxis_title="Label",
+                        height=max(320, 54 * len(available_labels)),
+                        margin=dict(l=10, r=10, t=45, b=10),
+                    )
+                    st.plotly_chart(fig, width="stretch", key=f"distance_single_heat_{metric_name}")
+    with single_tabs[2]:
+        for start in range(0, len(METRIC_ORDER), 2):
+            metric_chunk = METRIC_ORDER[start : start + 2]
+            cols = st.columns(len(metric_chunk))
+            for col, metric_name in zip(cols, metric_chunk):
+                with col:
+                    metric_df = distance_view[
+                        (distance_view["metric"] == metric_name)
+                        & (distance_view["label"].isin(available_labels))
+                    ].copy()
+                    if metric_df.empty:
+                        st.caption(f"{metric_name}: no data")
+                        continue
+                    fig = build_distance_ring_figure(metric_df, available_labels, metric_name)
+                    st.plotly_chart(fig, width="stretch", key=f"distance_single_ring_{metric_name}")
+    with single_tabs[3]:
+        metric_tabs = st.tabs(METRIC_ORDER)
+        for metric_name, metric_tab in zip(METRIC_ORDER, metric_tabs):
+            with metric_tab:
+                metric_df = distance_view[
+                    (distance_view["metric"] == metric_name)
+                    & (distance_view["label"].isin(available_labels))
+                ].copy()
+                if metric_df.empty:
+                    st.info(f"No data for {metric_name}.")
+                    continue
+                metric_df["r"] = pd.Categorical(metric_df["r"], categories=ordered_distance_bins(metric_df["r"].tolist()), ordered=True)
+                for start in range(0, len(available_labels), 3):
+                    chunk = available_labels[start : start + 3]
+                    cols = st.columns(len(chunk))
+                    for col, label_name in zip(cols, chunk):
+                        with col:
+                            label_df = metric_df[metric_df["label"] == label_name].copy()
+                            if label_df.empty:
+                                st.caption(f"{label_name}: no data")
+                                continue
+                            fig = px.line(
+                                label_df,
+                                x="r",
+                                y="value",
+                                markers=True,
+                                title=label_name,
+                                labels={"r": "Radius bin", "value": "Error (m)"},
+                                color_discrete_sequence=[PLOTLY_COLORS["blue"]],
+                            )
+                            fig.update_layout(height=280, margin=dict(l=10, r=10, t=45, b=10), showlegend=False)
+                            st.plotly_chart(fig, width="stretch", key=f"distance_single_small_{metric_name}_{label_name}")
+
+section_header(
+    "Polar Field",
+    "Each tab is one metric, and every label gets its own heatmap. That keeps the page filter-free while still easy to scan.",
+)
+polar_view_tabs = st.tabs(["Heatmap", "Circular"])
+for view_name, outer_tab in zip(["heatmap", "circular"], polar_view_tabs):
+    with outer_tab:
+        metric_tabs = st.tabs(METRIC_ORDER)
+        for metric_name, metric_tab in zip(METRIC_ORDER, metric_tabs):
+            with metric_tab:
+                metric_polar = polar_delta[polar_delta["metric"] == metric_name].copy() if polar_delta is not None else polar_summary[polar_summary["metric"] == metric_name].copy()
+                value_col = "value_delta" if polar_delta is not None else "value"
+                if metric_polar.empty:
+                    st.info(f"No data for {metric_name}.")
+                    continue
+                for start in range(0, len(available_labels), 3):
+                    chunk = available_labels[start : start + 3]
+                    cols = st.columns(len(chunk))
+                    for col, label_name in zip(cols, chunk):
+                        with col:
+                            label_polar = metric_polar[metric_polar["label"] == label_name].copy()
+                            if label_polar.empty:
+                                st.caption(f"{label_name}: no data")
+                                continue
+                            if view_name == "heatmap":
+                                pivot = (
+                                    label_polar.pivot(index="r", columns="theta", values=value_col)
+                                    .reindex(index=R_LABELS, columns=THETA_LABELS)
+                                )
+                                fig = go.Figure(
+                                    data=go.Heatmap(
+                                        z=pivot.values,
+                                        x=[str(v) for v in pivot.columns],
+                                        y=[str(v) for v in pivot.index],
+                                        colorscale="RdBu" if polar_delta is not None else "YlOrRd",
+                                        zmid=0 if polar_delta is not None else None,
+                                        hovertemplate=("theta=%{x}<br>r=%{y}<br>Δ=%{z:+.3f} m<extra></extra>" if polar_delta is not None else "theta=%{x}<br>r=%{y}<br>value=%{z:.3f} m<extra></extra>"),
+                                    )
+                                )
+                                fig.update_layout(
+                                    title=f"{label_name} (B - A)" if polar_delta is not None else label_name,
+                                    xaxis_title="Theta",
+                                    yaxis_title="Radius",
+                                    height=320,
+                                    margin=dict(l=10, r=10, t=45, b=10),
+                                )
+                                st.plotly_chart(fig, width="stretch", key=f"polar_{view_name}_{metric_name}_{label_name}")
+                            else:
+                                fig = build_theta_ring_figure(
+                                    label_polar=label_polar,
+                                    metric_name=metric_name,
+                                    label_name=label_name,
+                                    value_col=value_col,
+                                    delta_mode=polar_delta is not None,
+                                )
+                                st.plotly_chart(fig, width="stretch", key=f"polar_{view_name}_{metric_name}_{label_name}")
+
+section_header(
+    "Metric Table",
+    "Exact summary values for the labels in view, aligned with the specsheet future metric definitions.",
+)
+table_cols = ["label", "future_rows"] + METRIC_ORDER
+st.dataframe(
+    (
+        compare_label[["label"] + [f"{m}_A" for m in METRIC_ORDER] + [f"{m}_B" for m in METRIC_ORDER] + [f"{m}_delta" for m in METRIC_ORDER]]
+        if compare_label is not None
+        else (label_view[table_cols] if not label_view.empty else label_summary[table_cols])
+    ),
+    width="stretch",
+    hide_index=True,
+    column_config={
+        "future_rows": st.column_config.NumberColumn("Rows", format="%d"),
+        "minADE@1s": st.column_config.NumberColumn("minADE@1s", format="%.3f m"),
+        "minADE@3s": st.column_config.NumberColumn("minADE@3s", format="%.3f m"),
+        "minADE@5s": st.column_config.NumberColumn("minADE@5s", format="%.3f m"),
+        "minFDE@1s": st.column_config.NumberColumn("minFDE@1s", format="%.3f m"),
+        "minFDE@3s": st.column_config.NumberColumn("minFDE@3s", format="%.3f m"),
+        "minFDE@5s": st.column_config.NumberColumn("minFDE@5s", format="%.3f m"),
+    },
+)
diff --git a/evaluation_dashboard_app/pages/13_Trend_Insights.py b/evaluation_dashboard_app/pages/13_Trend_Insights.py
new file mode 100644
index 0000000..f4d5235
--- /dev/null
+++ b/evaluation_dashboard_app/pages/13_Trend_Insights.py
@@ -0,0 +1,2133 @@
+from __future__ import annotations
+
+import json
+import re
+import shutil
+from html import escape
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import streamlit as st
+import streamlit.components.v1 as components
+
+from lib.page_chrome import inject_app_page_styles, render_page_hero, section_header
+from lib.path_utils import get_data_root, path_display, resolve_under_data_root
+from lib.release_specsheet_library import discover_release_specsheet_inventory
+from lib.specsheet_report import (
+    DEFAULT_TREND_METADATA_TEXT,
+    TREND_METADATA_FILENAME,
+    TREND_SUMMARY_FILENAME,
+    TrendReleaseGroup,
+    classify_trend_summary,
+    discover_trend_release_groups,
+    extract_devops_case_rows,
+    extract_performance_metrics_from_summary,
+    load_trend_summary_file,
+    parse_trend_metadata_text,
+)
+
+st.set_page_config(page_title="Trend Insights", layout="wide", initial_sidebar_state="expanded")
+inject_app_page_styles()
+
+
+def _parse_data_count(value: Any) -> int | None:
+    text = str(value or "").strip().replace(",", "").replace("+", "")
+    if not text:
+        return None
+    try:
+        return int(text)
+    except ValueError:
+        return None
+
+
+def _select_primary_metadata(group: TrendReleaseGroup) -> dict[str, Any]:
+    for role in ("full", "usecase", "devops", "performance_blocks", "unknown"):
+        if role in group.jobs:
+            return group.jobs[role]["metadata"]
+    return {}
+
+
+def _safe_path_part(value: Any, fallback: str) -> str:
+    text = str(value or "").strip()
+    text = re.sub(r"[^\w.\-]+", "_", text).strip("._")
+    return text or fallback
+
+
+def _resolve_summary_json_input(user_path: str) -> tuple[Path | None, str]:
+    resolved, err = resolve_under_data_root(user_path, allow_missing=False)
+    if err:
+        return None, err
+    assert resolved is not None
+    if resolved.is_file():
+        if resolved.name != TREND_SUMMARY_FILENAME:
+            return None, f"Expected a {TREND_SUMMARY_FILENAME} file: {path_display(resolved)}"
+        return resolved, ""
+    for candidate in (
+        resolved / TREND_SUMMARY_FILENAME,
+        resolved / "resources" / TREND_SUMMARY_FILENAME,
+    ):
+        if candidate.exists():
+            return candidate, ""
+    return None, f"No {TREND_SUMMARY_FILENAME} found in {path_display(resolved)} or its resources/ folder."
+
+
+def _default_job_id_from_summary(summary_path: Path) -> str:
+    if summary_path.parent.name == "resources":
+        return summary_path.parent.parent.name
+    return summary_path.parent.name
+
+
+def _assemble_trend_release_group(
+    *,
+    release_name: str,
+    topic_name: str,
+    role_sources: dict[str, str],
+    role_job_ids: dict[str, str],
+    metadata: dict[str, Any],
+) -> Path:
+    data_root = get_data_root()
+    release_dir = data_root / _safe_path_part(release_name, "trend_release")
+    topic_dir = release_dir / _safe_path_part(topic_name, "perception.object_recognition.objects")
+    expected_roles = {"full", "usecase", "devops"}
+    seen_roles: dict[str, Path] = {}
+
+    for expected_role, source_text in role_sources.items():
+        summary_path, err = _resolve_summary_json_input(source_text)
+        if err:
+            raise ValueError(f"{expected_role}: {err}")
+        assert summary_path is not None
+        summary = load_trend_summary_file(summary_path)
+        actual_role = classify_trend_summary(summary)
+        if actual_role != expected_role:
+            raise ValueError(
+                f"{expected_role}: {path_display(summary_path)} classified as `{actual_role}`, "
+                f"not `{expected_role}`."
+            )
+        seen_roles[actual_role] = summary_path
+
+    missing = sorted(expected_roles - set(seen_roles))
+    if missing:
+        raise ValueError(f"Missing required trend roles: {', '.join(missing)}")
+
+    for role, summary_path in seen_roles.items():
+        job_id = _safe_path_part(role_job_ids.get(role) or _default_job_id_from_summary(summary_path), role)
+        job_dir = topic_dir / job_id
+        job_dir.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(summary_path, job_dir / TREND_SUMMARY_FILENAME)
+        with (job_dir / TREND_METADATA_FILENAME).open("w", encoding="utf-8") as fh:
+            import yaml
+
+            yaml.safe_dump(metadata, fh, allow_unicode=True, sort_keys=False)
+    return release_dir
+
+
+def _render_release_trend_builder() -> None:
+    section_header("Build Release Trend Group")
+    with st.expander("Assemble full/usecase/devops summaries into one release", expanded=False):
+        with st.form("release_trend_builder_form"):
+            form_col1, form_col2 = st.columns([1.1, 1.2])
+            with form_col1:
+                release_name = st.text_input(
+                    "Release folder name",
+                    value="trend_release_<full_job>_<usecase_job>_<devops_job>",
+                )
+                topic_name = st.text_input(
+                    "Topic folder",
+                    value="perception.object_recognition.objects",
+                )
+                full_source = st.text_input("Full summary source")
+                usecase_source = st.text_input("Usecase summary source")
+                devops_source = st.text_input("DevOps summary source")
+            with form_col2:
+                full_job_id = st.text_input("Full job id override", value="")
+                usecase_job_id = st.text_input("Usecase job id override", value="")
+                devops_job_id = st.text_input("DevOps job id override", value="")
+                metadata_text = st.text_area(
+                    "Release metadata YAML",
+                    value=DEFAULT_TREND_METADATA_TEXT,
+                    height=180,
+                    help="Required keys: tags, pilot_auto_version, data_count, description, date.",
+                )
+            submitted = st.form_submit_button("Create Release Trend Group", type="primary")
+
+        if submitted:
+            try:
+                metadata = parse_trend_metadata_text(metadata_text)
+                created_dir = _assemble_trend_release_group(
+                    release_name=release_name,
+                    topic_name=topic_name,
+                    role_sources={
+                        "full": full_source,
+                        "usecase": usecase_source,
+                        "devops": devops_source,
+                    },
+                    role_job_ids={
+                        "full": full_job_id,
+                        "usecase": usecase_job_id,
+                        "devops": devops_job_id,
+                    },
+                    metadata=metadata,
+                )
+                st.success(f"Created release trend group at `{path_display(created_dir)}`. Refreshing inventory...")
+                st.rerun()
+            except Exception as exc:
+                st.error(f"Could not create release trend group: {exc}")
+
+
+def _release_display_name(version: Any, date: Any, description: Any = "") -> str:
+    version_text = str(version or "").strip() or "Unknown Version"
+    date_text = str(date or "").strip()
+    description_text = str(description or "").strip()
+    suffix = f" | {date_text}" if date_text else ""
+    if description_text:
+        suffix += f" | {description_text}"
+    return f"{version_text}{suffix}"
+
+
+def _with_pass_rate(frame: pd.DataFrame, *, passed_col: str = "passed", total_col: str = "total") -> pd.DataFrame:
+    enriched = frame.copy()
+    total = pd.to_numeric(enriched[total_col], errors="coerce")
+    passed = pd.to_numeric(enriched[passed_col], errors="coerce")
+    enriched["pass_rate"] = (passed / total.replace(0, pd.NA)) * 100.0
+    return enriched
+
+
+def _update_version_axis(fig: go.Figure, versions: list[str]) -> None:
+    fig.update_xaxes(categoryorder="array", categoryarray=versions)
+
+
+def _role_overview_url(release_row: dict[str, Any], role: str) -> str:
+    role_info = release_row.get("roles", {}).get(role, {})
+    return str(role_info.get("overview_url") or "")
+
+
+def _role_debug_path(release_row: dict[str, Any], role: str) -> str:
+    role_info = release_row.get("roles", {}).get(role, {})
+    return str(role_info.get("absolute_path") or "")
+
+
+def _role_evaluator_url(release_row: dict[str, Any], role: str) -> str:
+    role_info = release_row.get("roles", {}).get(role, {})
+    return str(role_info.get("evaluator_report_url") or "")
+
+
+def _topic_family(topic_name: Any) -> str:
+    topic = str(topic_name or "")
+    if topic == "perception.object_recognition.objects":
+        return "Perception Performance"
+    if topic.startswith("perception.object_recognition.detection."):
+        return "ML Model Performance"
+    return "Other"
+
+
+def _date_sort_value(value: Any) -> float:
+    parsed = pd.to_datetime(value, format="%Y.%m.%d", errors="coerce")
+    if pd.isna(parsed):
+        return -1.0
+    return float(parsed.timestamp())
+
+
+def _html_link(url: str, label: str, variant: str = "action") -> str:
+    if not url:
+        return '<span class="muted-cell">-</span>'
+    return (
+        f'<a class="link-chip link-chip-{escape(variant, quote=True)}" '
+        f'href="{escape(url, quote=True)}" target="_blank" rel="noopener noreferrer">{escape(label)}</a>'
+    )
+
+
+def _pdf_links_for_prefix(release: dict[str, Any], prefix: str) -> str:
+    links = []
+    for pdf in release.get("pdfs", []):
+        topic = str(pdf.get("topic") or "")
+        if topic == prefix or topic.startswith(prefix):
+            label = "Prediction"
+            if topic.startswith("perception.object_recognition.detection."):
+                label = topic.replace("perception.object_recognition.detection.", "").replace(".objects", "")
+                label = label.replace("bevfusion", "BEVFusion").replace("centerpoint", "CenterPoint")
+            links.append(_html_link(str(pdf.get("static_url") or ""), label, "pdf"))
+    return '<span class="link-chip-row">' + "".join(links) + "</span>" if links else '<span class="muted-cell">-</span>'
+
+
+def _has_pdf_for_prefix(release: dict[str, Any], prefix: str) -> bool:
+    for pdf in release.get("pdfs", []):
+        topic = str(pdf.get("topic") or "")
+        if topic == prefix or topic.startswith(prefix):
+            return True
+    return False
+
+
+def _render_release_library_table(releases: list[dict[str, Any]]) -> None:
+    group_headers = [
+        ("Release", 4),
+        ("Overview", 3),
+        ("Specsheet PDF", 2),
+        ("Evaluator Job", 3),
+    ]
+    col_widths = [360, 96, 240, 92, 96, 96, 96, 128, 168, 96, 96, 96]
+    headers = [
+        "Version",
+        "Date",
+        "Description",
+        "Data",
+        "Performance",
+        "Usecase",
+        "DevOps",
+        "Prediction",
+        "Detection",
+        "Performance",
+        "Usecase",
+        "DevOps",
+    ]
+    sort_types = ["text", "date", "text", "number", "text", "text", "text", "text", "text", "text", "text", "text"]
+    sortable_columns = {0, 1, 2, 3}
+    rows_html = []
+    for release in releases:
+        sort_values = [
+            str(release.get("version") or ""),
+            str(_date_sort_value(release.get("date"))),
+            str(release.get("description") or ""),
+            str(_parse_data_count(release.get("data_count")) or -1),
+            "open" if _role_overview_url(release, "performance") else "",
+            "open" if _role_overview_url(release, "usecase") else "",
+            "open" if _role_overview_url(release, "devops") else "",
+            "prediction" if _has_pdf_for_prefix(release, "perception.object_recognition.objects") else "",
+            "detection" if _has_pdf_for_prefix(release, "perception.object_recognition.detection.") else "",
+            "report" if _role_evaluator_url(release, "performance") else "",
+            "report" if _role_evaluator_url(release, "usecase") else "",
+            "report" if _role_evaluator_url(release, "devops") else "",
+        ]
+        cells = [
+            escape(str(release.get("version") or "")),
+            escape(str(release.get("date") or "")),
+            escape(str(release.get("description") or "")),
+            escape(str(release.get("data_count") or "")),
+            _html_link(_role_overview_url(release, "performance"), "Open", "overview"),
+            _html_link(_role_overview_url(release, "usecase"), "Open", "overview"),
+            _html_link(_role_overview_url(release, "devops"), "Open", "overview"),
+            _pdf_links_for_prefix(release, "perception.object_recognition.objects"),
+            _pdf_links_for_prefix(release, "perception.object_recognition.detection."),
+            _html_link(_role_evaluator_url(release, "performance"), "Report", "job"),
+            _html_link(_role_evaluator_url(release, "usecase"), "Report", "job"),
+            _html_link(_role_evaluator_url(release, "devops"), "Report", "job"),
+        ]
+        rows_html.append(
+            "<tr>"
+            + "".join(
+                f'<td data-sort-value="{escape(sort_value, quote=True)}">{cell}</td>'
+                for cell, sort_value in zip(cells, sort_values)
+            )
+            + "</tr>"
+        )
+    table_html = f"""
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+* {{
+  box-sizing: border-box;
+}}
+body {{
+  margin: 0;
+  padding: 0 0 10px 0;
+  background: transparent;
+  color: #0f172a;
+  font-family: "Source Sans Pro", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+}}
+.release-library-shell {{
+  background: transparent;
+}}
+.release-library-table-wrapper {{
+  overflow-x: auto;
+  overflow-y: visible;
+  width: 100%;
+  border: 1px solid rgba(148, 163, 184, 0.28);
+  border-radius: 10px;
+}}
+.release-library-table {{
+  border-collapse: separate;
+  border-spacing: 0;
+  table-layout: fixed;
+  min-width: 1660px;
+  width: 100%;
+  font-size: 0.88rem;
+}}
+.release-library-table th,
+.release-library-table td {{
+  border-bottom: 1px solid rgba(148, 163, 184, 0.28);
+  padding: 0.34rem 0.5rem;
+  text-align: left;
+  vertical-align: middle;
+  line-height: 1.22;
+  white-space: nowrap;
+}}
+.release-library-table th {{
+  background: #f8fafc;
+  color: #334155;
+  font-weight: 700;
+  white-space: nowrap;
+}}
+.release-library-table .group-header th {{
+  position: sticky;
+  top: 0;
+  z-index: 3;
+  background: #eef2ff;
+  color: #3730a3;
+  text-align: center;
+  font-size: 0.78rem;
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  border-right: 1px solid rgba(129, 140, 248, 0.22);
+}}
+.release-library-table .column-header th {{
+  position: sticky;
+  top: 29px;
+  z-index: 3;
+  background: #f8fafc;
+  font-size: 0.82rem;
+  text-align: center;
+  padding: 0;
+}}
+.sort-button {{
+  appearance: none;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  width: 100%;
+  min-height: 30px;
+  padding: 0.26rem 0.38rem;
+  border: 0;
+  background: transparent;
+  color: #334155;
+  font: inherit;
+  font-weight: 750;
+  cursor: pointer;
+}}
+.sort-button:hover {{
+  background: rgba(248, 250, 252, 0.92);
+  color: #334155;
+}}
+.plain-header {{
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  min-height: 30px;
+  padding: 0.26rem 0.38rem;
+  font-weight: 750;
+}}
+.release-library-table tbody tr:hover td {{
+  background: rgba(248, 250, 252, 0.82);
+}}
+.release-library-table td:nth-child(1) {{
+  font-weight: 650;
+  color: #0f172a;
+}}
+.release-library-table td:nth-child(3) {{
+  color: #475569;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}}
+.release-library-table td:nth-child(2),
+.release-library-table td:nth-child(4) {{
+  color: #475569;
+}}
+.release-library-table td:nth-child(n+5) {{
+  text-align: center;
+}}
+.release-library-table td:nth-child(5),
+.release-library-table td:nth-child(6),
+.release-library-table td:nth-child(7),
+.release-library-table td:nth-child(10),
+.release-library-table td:nth-child(11),
+.release-library-table td:nth-child(12) {{
+}}
+.release-library-table td:nth-child(8),
+.release-library-table td:nth-child(9) {{
+}}
+.link-chip {{
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  min-width: 64px;
+  min-height: 22px;
+  padding: 0.08rem 0.46rem;
+  margin: 0.03rem 0;
+  border-radius: 999px;
+  font-weight: 650;
+  font-size: 0.8rem;
+  text-decoration: none;
+  border: 1px solid transparent;
+}}
+.link-chip-row {{
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  gap: 0.22rem;
+  flex-wrap: nowrap;
+}}
+.link-chip-overview {{
+  color: #1d4ed8;
+  background: #eff6ff;
+  border-color: #bfdbfe;
+}}
+.link-chip-pdf {{
+  color: #9f1239;
+  background: #fff1f2;
+  border-color: #fecdd3;
+}}
+.link-chip-job {{
+  color: #166534;
+  background: #f0fdf4;
+  border-color: #bbf7d0;
+}}
+.link-chip:hover {{
+  text-decoration: underline;
+  filter: brightness(0.98);
+}}
+.muted-cell {{
+  color: #94a3b8;
+}}
+</style>
+</head>
+<body>
+<div class="release-library-shell">
+  <div class="release-library-table-wrapper">
+    <table id="releaseLibraryTable" class="release-library-table">
+      <colgroup>{''.join(f'<col style="width:{width}px">' for width in col_widths)}</colgroup>
+      <thead>
+        <tr class="group-header">{''.join(f'<th colspan="{span}">{escape(header)}</th>' for header, span in group_headers)}</tr>
+        <tr class="column-header">{''.join(f'<th><button class="sort-button" type="button" data-index="{idx}" data-type="{sort_types[idx]}">{escape(header)}</button></th>' if idx in sortable_columns else f'<th><span class="plain-header">{escape(header)}</span></th>' for idx, header in enumerate(headers))}</tr>
+      </thead>
+      <tbody>{''.join(rows_html)}</tbody>
+    </table>
+  </div>
+</div>
+<script>
+(function () {{
+  const table = document.getElementById("releaseLibraryTable");
+  const tbody = table.querySelector("tbody");
+  const buttons = Array.from(table.querySelectorAll(".sort-button"));
+  let activeSort = {{ index: 1, dir: "desc", type: "date" }};
+
+  function allRows() {{
+    return Array.from(tbody.querySelectorAll("tr"));
+  }}
+
+  function cellValue(row, index, type) {{
+    const cell = row.children[index];
+    const raw = (cell && (cell.dataset.sortValue || cell.innerText) || "").trim();
+    if (type === "number" || type === "date") {{
+      const value = Number(raw.replace(/,/g, ""));
+      return Number.isFinite(value) ? value : -Infinity;
+    }}
+    return raw.toLowerCase();
+  }}
+
+  function compareRows(a, b, sort) {{
+    const av = cellValue(a, sort.index, sort.type);
+    const bv = cellValue(b, sort.index, sort.type);
+    if (av < bv) return sort.dir === "asc" ? -1 : 1;
+    if (av > bv) return sort.dir === "asc" ? 1 : -1;
+    return 0;
+  }}
+
+  function applySort() {{
+    const rows = allRows();
+    rows.sort((a, b) => compareRows(a, b, activeSort));
+    rows.forEach((row) => tbody.appendChild(row));
+    buttons.forEach((button) => {{
+      const isActive = Number(button.dataset.index) === activeSort.index;
+      button.dataset.dir = isActive ? activeSort.dir : "";
+    }});
+  }}
+
+  buttons.forEach((button) => {{
+    button.addEventListener("click", () => {{
+      const nextIndex = Number(button.dataset.index);
+      const nextType = button.dataset.type || "text";
+      const nextDir = activeSort.index === nextIndex && activeSort.dir === "asc" ? "desc" : "asc";
+      activeSort = {{ index: nextIndex, dir: nextDir, type: nextType }};
+      applySort();
+    }});
+  }});
+
+  applySort();
+}})();
+</script>
+</body>
+</html>
+"""
+    component_height = 78 + max(1, len(releases)) * 32
+    components.html(table_html, height=component_height, scrolling=False)
+
+
+def _release_inventory_debug_rows(releases: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for release in releases:
+        rows.append(
+            {
+                "version": release["version"],
+                "date": release["date"],
+                "release": release["release"],
+                "release_dir": release["release_dir_absolute"],
+                "performance_dir": _role_debug_path(release, "performance"),
+                "usecase_dir": _role_debug_path(release, "usecase"),
+                "devops_dir": _role_debug_path(release, "devops"),
+                "performance_job_url": _role_evaluator_url(release, "performance"),
+                "usecase_job_url": _role_evaluator_url(release, "usecase"),
+                "devops_job_url": _role_evaluator_url(release, "devops"),
+                "pdf_paths": "\n".join(pdf["absolute_path"] for pdf in release.get("pdfs", [])),
+            }
+        )
+    return rows
+
+
+def _release_metric_bar_ranges(frame: pd.DataFrame) -> dict[str, tuple[float, float]]:
+    ranges: dict[str, tuple[float, float]] = {}
+    metric_columns = ("mAP", "precision", "recall", "overall_pass_rate", "FNR", "x_error", "y_error", "yaw_error")
+    for column in metric_columns:
+        if column not in frame.columns:
+            continue
+        values = pd.to_numeric(frame[column], errors="coerce")
+        if not values.notna().any():
+            continue
+        min_value = float(values.min(skipna=True))
+        max_value = float(values.max(skipna=True))
+        if abs(max_value - min_value) < 1e-12:
+            if column == "overall_pass_rate":
+                min_value, max_value = 0.0, 100.0
+            elif column in {"mAP", "precision", "recall"}:
+                min_value, max_value = 0.0, 1.0
+            else:
+                min_value, max_value = 0.0, max(max_value, 1.0)
+        ranges[column] = (min_value, max_value)
+    return ranges
+
+
+def _release_performance_cell_html(value: Any, column: str, ranges: dict[str, tuple[float, float]]) -> str:
+    metric_columns = {"mAP", "precision", "recall", "overall_pass_rate", "FNR", "x_error", "y_error", "yaw_error"}
+    if column not in metric_columns:
+        return escape(str(value or ""))
+
+    numeric = pd.to_numeric(pd.Series([value]), errors="coerce").iloc[0]
+    if pd.isna(numeric):
+        return '<span class="perf-muted">-</span>'
+
+    min_value, max_value = ranges.get(column, (0.0, 1.0))
+    span = max(max_value - min_value, 1e-12)
+    normalized = max(0.0, min(1.0, (float(numeric) - min_value) / span))
+    pct = 8.0 + normalized * 92.0
+    if column == "overall_pass_rate":
+        label = f"{float(numeric):.1f}%"
+    else:
+        label = f"{float(numeric):.3f}"
+
+    # Calm app-aligned palette: soft rose for weak/concerning values, soft teal for strong/healthy values.
+    teal = (45, 212, 191)
+    rose = (251, 113, 133)
+    if column in {"mAP", "precision", "recall", "overall_pass_rate"}:
+        color_ratio = normalized
+    else:
+        color_ratio = 1.0 - normalized
+    red = round(rose[0] + (teal[0] - rose[0]) * color_ratio)
+    green = round(rose[1] + (teal[1] - rose[1]) * color_ratio)
+    blue = round(rose[2] + (teal[2] - rose[2]) * color_ratio)
+
+    return (
+        f'<div class="perf-bar-cell" '
+        f'style="--bar-width:{pct:.1f}%; --bar-r:{red}; --bar-g:{green}; --bar-b:{blue};">'
+        f'<span>{escape(label)}</span>'
+        "</div>"
+    )
+
+
+def _release_performance_column_group(column: str) -> str:
+    if column in {"version", "date", "description", "data_count"}:
+        return "Release"
+    if column in {"mAP", "precision", "recall"}:
+        return "Score"
+    if column in {"FNR", "x_error", "y_error", "yaw_error"}:
+        return "Error"
+    if column == "overall_pass_rate":
+        return "Pass Rate"
+    return "Jobs / Metadata"
+
+
+def _render_release_performance_html_table(frame: pd.DataFrame) -> None:
+    ranges = _release_metric_bar_ranges(frame)
+    numeric_columns = {"mAP", "precision", "recall", "overall_pass_rate", "FNR", "x_error", "y_error", "yaw_error", "data_count"}
+    group_spans: list[tuple[str, int]] = []
+    for column in frame.columns:
+        group = _release_performance_column_group(str(column))
+        if group_spans and group_spans[-1][0] == group:
+            group_spans[-1] = (group, group_spans[-1][1] + 1)
+        else:
+            group_spans.append((group, 1))
+    group_header_html = "".join(
+        f'<th class="perf-group-header" colspan="{span}">{escape(group)}</th>'
+        for group, span in group_spans
+    )
+    header_html = "".join(
+        (
+            f'<th><button class="perf-sort-button" type="button" data-index="{idx}" '
+            f'data-type="{"number" if column in numeric_columns else "text"}">{escape(str(column))}</button></th>'
+        )
+        for idx, column in enumerate(frame.columns)
+    )
+    row_html = []
+    for _, row in frame.iterrows():
+        cells = []
+        for column in frame.columns:
+            value = row.get(column)
+            if column == "data_count":
+                parsed_count = _parse_data_count(value)
+                sort_value = "" if parsed_count is None else str(parsed_count)
+            elif column in numeric_columns:
+                numeric = pd.to_numeric(pd.Series([value]), errors="coerce").iloc[0]
+                sort_value = "" if pd.isna(numeric) else f"{float(numeric):.12g}"
+            else:
+                sort_value = str(value or "")
+            cells.append(
+                f'<td class="perf-selectable-td {"perf-metric-td" if column in ranges else ""}" '
+                f'data-row="{len(row_html)}" data-col="{len(cells)}" '
+                f'data-sort-value="{escape(sort_value, quote=True)}">'
+                f"{_release_performance_cell_html(value, column, ranges)}</td>"
+            )
+        row_html.append(f"<tr>{''.join(cells)}</tr>")
+
+    table_html = f"""
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+* {{
+  box-sizing: border-box;
+}}
+body {{
+  margin: 0;
+  padding: 0;
+  background: transparent;
+  color: #0f172a;
+  font-family: "Source Sans Pro", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+}}
+.release-perf-table-wrap {{
+  overflow-x: auto;
+  overflow-y: visible;
+  border: 1px solid rgba(148, 163, 184, 0.28);
+  border-radius: 10px;
+}}
+.release-perf-table {{
+  border-collapse: separate;
+  border-spacing: 0;
+  min-width: 1280px;
+  width: 100%;
+  font-size: 0.86rem;
+  user-select: none;
+}}
+.release-perf-table th,
+.release-perf-table td {{
+  border-bottom: 1px solid rgba(148, 163, 184, 0.22);
+  padding: 0.34rem 0.48rem;
+  text-align: left;
+  vertical-align: middle;
+  white-space: nowrap;
+}}
+.release-perf-table th {{
+  position: sticky;
+  z-index: 2;
+  background: #f8fafc;
+  color: #334155;
+  font-weight: 750;
+  padding: 0;
+}}
+.release-perf-table .perf-group-header {{
+  top: 0;
+  z-index: 3;
+  padding: 0.3rem 0.48rem;
+  text-align: center;
+  background: #eef2ff;
+  color: #3730a3;
+  font-size: 0.76rem;
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  border-right: 1px solid rgba(129, 140, 248, 0.20);
+}}
+.release-perf-table .perf-column-header th {{
+  top: 30px;
+}}
+.perf-sort-button {{
+  appearance: none;
+  display: flex;
+  align-items: center;
+  justify-content: flex-start;
+  width: 100%;
+  min-height: 32px;
+  padding: 0.34rem 0.48rem;
+  border: 0;
+  background: transparent;
+  color: #334155;
+  font: inherit;
+  font-weight: 750;
+  cursor: pointer;
+}}
+.perf-sort-button:hover {{
+  background: rgba(219, 234, 254, 0.62);
+}}
+.perf-sort-button[data-dir="asc"]::after {{
+  content: "▲";
+  margin-left: 0.35rem;
+  color: #2563eb;
+  font-size: 0.64rem;
+}}
+.perf-sort-button[data-dir="desc"]::after {{
+  content: "▼";
+  margin-left: 0.35rem;
+  color: #2563eb;
+  font-size: 0.64rem;
+}}
+.release-perf-table tbody tr:hover td {{
+  background: rgba(248, 250, 252, 0.82);
+}}
+.release-perf-table td.perf-selected-cell {{
+  outline: 1.5px solid #2563eb;
+  outline-offset: -2px;
+  background: rgba(219, 234, 254, 0.58) !important;
+}}
+.release-perf-table td.perf-selected-cell .perf-bar-cell {{
+  box-shadow: inset 0 0 0 999px rgba(219, 234, 254, 0.34);
+}}
+.release-perf-table td.perf-selection-anchor {{
+  outline: 2px solid #1d4ed8;
+  outline-offset: -2px;
+}}
+.release-perf-table .perf-metric-td {{
+  padding: 0;
+  min-width: 86px;
+  text-align: right;
+}}
+.perf-bar-cell {{
+  position: relative;
+  min-height: 32px;
+  height: 100%;
+  display: flex;
+  align-items: center;
+  justify-content: flex-end;
+  padding: 0 0.5rem;
+  font-variant-numeric: tabular-nums;
+  font-weight: 700;
+  color: #0f172a;
+  overflow: hidden;
+}}
+.perf-bar-cell::before {{
+  content: "";
+  position: absolute;
+  inset: 0 auto 0 0;
+  width: var(--bar-width);
+  z-index: 0;
+  background: linear-gradient(
+    90deg,
+    rgba(var(--bar-r), var(--bar-g), var(--bar-b), 0.34),
+    rgba(var(--bar-r), var(--bar-g), var(--bar-b), 0.15)
+  );
+}}
+.perf-bar-cell span {{
+  position: relative;
+  z-index: 1;
+}}
+.perf-muted {{
+  color: #94a3b8;
+  display: block;
+  padding: 0.34rem 0.5rem;
+}}
+</style>
+</head>
+<body>
+<div class="release-perf-table-wrap">
+  <table id="releasePerfTable" class="release-perf-table">
+    <thead>
+      <tr>{group_header_html}</tr>
+      <tr class="perf-column-header">{header_html}</tr>
+    </thead>
+    <tbody>{''.join(row_html)}</tbody>
+  </table>
+</div>
+<script>
+(function () {{
+  const table = document.getElementById("releasePerfTable");
+  const tbody = table.querySelector("tbody");
+  const buttons = Array.from(table.querySelectorAll(".perf-sort-button"));
+  const cells = Array.from(table.querySelectorAll("td.perf-selectable-td"));
+  let activeSort = null;
+  let isSelecting = false;
+  let selectionAnchor = null;
+
+  function rows() {{
+    return Array.from(tbody.querySelectorAll("tr"));
+  }}
+
+  function cellValue(row, index, type) {{
+    const cell = row.children[index];
+    const raw = (cell && (cell.dataset.sortValue || cell.innerText) || "").trim();
+    if (type === "number") {{
+      const value = Number(raw.replace(/,/g, ""));
+      return Number.isFinite(value) ? value : -Infinity;
+    }}
+    return raw.toLowerCase();
+  }}
+
+  function applySort() {{
+    if (!activeSort) return;
+    clearSelection();
+    const sortedRows = rows().sort((a, b) => {{
+      const av = cellValue(a, activeSort.index, activeSort.type);
+      const bv = cellValue(b, activeSort.index, activeSort.type);
+      if (av < bv) return activeSort.dir === "asc" ? -1 : 1;
+      if (av > bv) return activeSort.dir === "asc" ? 1 : -1;
+      return 0;
+    }});
+    sortedRows.forEach((row) => tbody.appendChild(row));
+    buttons.forEach((button) => {{
+      const isActive = Number(button.dataset.index) === activeSort.index;
+      button.dataset.dir = isActive ? activeSort.dir : "";
+    }});
+  }}
+
+  buttons.forEach((button) => {{
+    button.addEventListener("click", () => {{
+      const nextIndex = Number(button.dataset.index);
+      const nextType = button.dataset.type || "text";
+      const nextDir = activeSort && activeSort.index === nextIndex && activeSort.dir === "asc" ? "desc" : "asc";
+      activeSort = {{ index: nextIndex, type: nextType, dir: nextDir }};
+      applySort();
+    }});
+  }});
+
+  function clearSelection() {{
+    cells.forEach((cell) => {{
+      cell.classList.remove("perf-selected-cell");
+      cell.classList.remove("perf-selection-anchor");
+    }});
+  }}
+
+  function cellPosition(cell) {{
+    return {{
+      row: rows().indexOf(cell.parentElement),
+      col: cell.cellIndex,
+    }};
+  }}
+
+  function selectRange(anchorCell, targetCell, additive) {{
+    if (!anchorCell || !targetCell) return;
+    if (!additive) clearSelection();
+    const anchor = cellPosition(anchorCell);
+    const target = cellPosition(targetCell);
+    const rowMin = Math.min(anchor.row, target.row);
+    const rowMax = Math.max(anchor.row, target.row);
+    const colMin = Math.min(anchor.col, target.col);
+    const colMax = Math.max(anchor.col, target.col);
+    rows().forEach((row, rowIndex) => {{
+      if (rowIndex < rowMin || rowIndex > rowMax) return;
+      Array.from(row.children).forEach((cell, colIndex) => {{
+        if (colIndex >= colMin && colIndex <= colMax) {{
+          cell.classList.add("perf-selected-cell");
+        }}
+      }});
+    }});
+    anchorCell.classList.add("perf-selection-anchor");
+  }}
+
+  cells.forEach((cell) => {{
+    cell.addEventListener("mousedown", (event) => {{
+      isSelecting = true;
+      selectionAnchor = cell;
+      selectRange(selectionAnchor, cell, event.ctrlKey || event.metaKey);
+      event.preventDefault();
+    }});
+    cell.addEventListener("mouseenter", () => {{
+      if (isSelecting) {{
+        selectRange(selectionAnchor, cell, false);
+      }}
+    }});
+  }});
+
+  document.addEventListener("mouseup", () => {{
+    isSelecting = false;
+    selectionAnchor = null;
+  }});
+}})();
+</script>
+</body>
+</html>
+"""
+    component_height = 76 + max(1, len(frame)) * 34
+    components.html(table_html, height=component_height, scrolling=False)
+
+
+def _release_performance_table(
+    frame: pd.DataFrame,
+    *,
+    family: str,
+    empty_message: str,
+    table_mode: str,
+) -> None:
+    if frame.empty:
+        st.info(empty_message)
+        return
+    view = frame[frame["topic_family"] == family].copy()
+    if view.empty:
+        st.info(empty_message)
+        return
+    columns = [
+        "version",
+        "date",
+        "description",
+        "data_count",
+        "mAP",
+        "precision",
+        "recall",
+        "FNR",
+        "x_error",
+        "y_error",
+        "yaw_error",
+        "roles",
+        "full_job_id",
+        "usecase_job_id",
+        "devops_job_id",
+        "topic_name",
+    ]
+    if family == "Perception Performance":
+        columns.insert(columns.index("roles"), "overall_pass_rate")
+    visible = [column for column in columns if column in view.columns]
+    display_frame = view.sort_values(["date_sort", "version", "release_name"], ascending=[False, False, False])[visible]
+    if table_mode == "Colored bars":
+        _render_release_performance_html_table(display_frame)
+    else:
+        dataframe_height = 52 + max(1, len(display_frame)) * 36
+        dataframe_column_config = {
+            "version": st.column_config.TextColumn("version", width="large"),
+            "description": st.column_config.TextColumn("description", width="medium"),
+            "full_job_id": st.column_config.TextColumn("full_job_id", width="large"),
+            "usecase_job_id": st.column_config.TextColumn("usecase_job_id", width="large"),
+            "devops_job_id": st.column_config.TextColumn("devops_job_id", width="large"),
+            "topic_name": st.column_config.TextColumn("topic_name", width="large"),
+        }
+        st.dataframe(
+            display_frame,
+            width="stretch",
+            hide_index=True,
+            height=dataframe_height,
+            column_config={key: value for key, value in dataframe_column_config.items() if key in display_frame.columns},
+        )
+
+
+def _build_pass_combo_chart(
+    frame: pd.DataFrame,
+    *,
+    title: str,
+    versions: list[str],
+    line_y_col: str = "pass_rate",
+    series_col: str | None = None,
+    scenario_count_col: str = "total",
+    hover_cols: list[str] | None = None,
+) -> go.Figure:
+    fig = go.Figure()
+    show_legend = series_col is not None
+    scenario_totals = (
+        frame.groupby("version", dropna=False)[scenario_count_col]
+        .sum()
+        .reindex(versions)
+        .fillna(0)
+    )
+    fig.add_bar(
+        x=versions,
+        y=scenario_totals.tolist(),
+        name="Scenario Count",
+        marker_color="#bfdbfe",
+        opacity=0.32,
+        yaxis="y2",
+        hovertemplate="<b>%{x}</b><br>Scenario Count: %{y:.0f}<extra></extra>",
+    )
+
+    hover_cols = hover_cols or ["date", "release_name", "passed", "total"]
+    plot_df = frame.copy()
+    version_order = {version: idx for idx, version in enumerate(versions)}
+    plot_df["__version_order"] = plot_df["version"].map(version_order).fillna(len(version_order))
+    plot_df = plot_df.sort_values(["__version_order", "version", "date", "release_name"])
+    if series_col is None:
+        fig.add_trace(
+            go.Scatter(
+                x=plot_df["version"],
+                y=plot_df[line_y_col],
+                name=title,
+                mode="lines+markers",
+                line=dict(color="#1d4ed8", width=3),
+                marker=dict(size=8, color="#1d4ed8"),
+                customdata=plot_df[hover_cols].to_numpy() if hover_cols else None,
+                hovertemplate="<b>%{x}</b><br>Pass Rate: %{y:.1f}%<br>Date: %{customdata[0]}<br>Release: %{customdata[1]}<extra></extra>",
+            )
+        )
+    else:
+        palette = px.colors.qualitative.Bold + px.colors.qualitative.Safe + px.colors.qualitative.Set2
+        for idx, series_name in enumerate(plot_df[series_col].dropna().astype(str).unique().tolist()):
+            series_df = plot_df[plot_df[series_col].astype(str) == series_name].sort_values(
+                ["__version_order", "version", "date", "release_name"]
+            )
+            color = palette[idx % len(palette)]
+            fig.add_trace(
+                go.Scatter(
+                    x=series_df["version"],
+                    y=series_df[line_y_col],
+                    name=series_name,
+                    mode="lines+markers",
+                    line=dict(color=color, width=3),
+                    marker=dict(size=7, color=color),
+                    customdata=series_df[hover_cols].to_numpy() if hover_cols else None,
+                    hovertemplate=(
+                        "<b>%{x}</b><br>"
+                        + f"{series_col.replace('_', ' ').title()}: {series_name}<br>"
+                        + "Pass Rate: %{y:.1f}%<br>"
+                        + "Date: %{customdata[0]}<br>"
+                        + "Release: %{customdata[1]}<br>"
+                        + "Passed: %{customdata[2]:.0f}<br>"
+                        + "Total: %{customdata[3]:.0f}<extra></extra>"
+                    ),
+                )
+            )
+
+    fig.update_layout(
+        title=title,
+        xaxis_title="Pilot.Auto Version",
+        yaxis_title="Pass Rate (%)",
+        yaxis2=dict(title="Scenario Count", overlaying="y", side="right", showgrid=False),
+        height=440,
+        showlegend=show_legend,
+        legend=dict(orientation="h", yanchor="top", y=-0.22, x=0, xanchor="left"),
+        margin=dict(l=20, r=20, t=80, b=90),
+        plot_bgcolor="#ffffff",
+        paper_bgcolor="#ffffff",
+    )
+    fig.update_xaxes(showgrid=False, categoryorder="array", categoryarray=versions)
+    fig.update_yaxes(range=[0, 100], gridcolor="rgba(148, 163, 184, 0.18)")
+    return fig
+
+
+def _build_defect_hierarchy_bars(
+    frame: pd.DataFrame,
+    *,
+    category_cols: list[str],
+    title: str,
+    color_col: str = "major_category",
+    label_cols: list[str] | None = None,
+    color_map: dict[str, str] | None = None,
+) -> go.Figure:
+    bars = frame.copy()
+    for category_col in category_cols:
+        bars[category_col] = bars[category_col].fillna("Unspecified")
+    label_cols = label_cols or category_cols
+    bars["full_label"] = bars[label_cols].astype(str).agg(" / ".join, axis=1)
+    bars["label"] = bars["full_label"]
+    bars = bars.sort_values(category_cols + ["pass_rate", "total"], ascending=[True] * len(category_cols) + [False, False])
+    fig = px.bar(
+        bars,
+        x="label",
+        y="pass_rate",
+        color=color_col,
+        color_discrete_map=color_map,
+        hover_data={"label": False, "full_label": True, "passed": True, "total": True},
+        text=bars["pass_rate"].map(lambda value: f"{value:.1f}%" if pd.notna(value) else "n/a"),
+        title=title,
+    )
+    fig.update_layout(
+        height=500,
+        margin=dict(l=20, r=20, t=70, b=140),
+        xaxis_title=" / ".join(label.replace("_", " ").title() for label in label_cols),
+        yaxis_title="Pass Rate (%)",
+        legend_title_text=color_col.replace("_", " ").title(),
+    )
+    fig.update_traces(textposition="outside", cliponaxis=False)
+    fig.update_xaxes(tickangle=-35, automargin=True)
+    fig.update_yaxes(range=[0, 100], automargin=True)
+    return fig
+
+
+def _build_defect_case_bars(
+    frame: pd.DataFrame,
+    *,
+    ordered_mid_categories: list[str],
+    max_cases: int = 20,
+) -> go.Figure:
+    case_bars = frame.copy()
+    case_bars["minor_category"] = case_bars["minor_category"].fillna(case_bars["case_name"])
+    case_bars["mid_order"] = case_bars["mid_category"].map(
+        {mid_category: idx for idx, mid_category in enumerate(ordered_mid_categories)}
+    )
+    case_bars = case_bars.sort_values(["mid_order", "pass_rate", "total"], ascending=[True, True, False])
+    case_bars = case_bars.head(max_cases)
+    fig = px.bar(
+        case_bars,
+        x="minor_category",
+        y="pass_rate",
+        color="mid_category",
+        hover_data=["major_category", "mid_category", "passed", "total"],
+        text=case_bars["pass_rate"].map(lambda value: f"{value:.1f}%" if pd.notna(value) else "n/a"),
+        title="Case Pass Rates",
+    )
+    fig.update_layout(
+        height=500,
+        margin=dict(l=20, r=20, t=70, b=140),
+        xaxis_title="Case",
+        yaxis_title="Pass Rate (%)",
+        legend_title_text="Mid Category",
+    )
+    fig.update_traces(textposition="outside", cliponaxis=False)
+    fig.update_xaxes(tickangle=-35, automargin=True, categoryorder="array", categoryarray=case_bars["minor_category"].tolist())
+    fig.update_yaxes(range=[0, 100], automargin=True)
+    return fig
+
+
+def _build_metric_timeline_heatmap(
+    frame: pd.DataFrame,
+    *,
+    value_col: str,
+    title: str,
+    color_title: str,
+) -> go.Figure:
+    matrix = frame.pivot_table(
+        index="label_name",
+        columns="release_axis",
+        values=value_col,
+        aggfunc="first",
+    ).dropna(how="all")
+    fig = px.imshow(
+        matrix,
+        aspect="auto",
+        color_continuous_scale=["#7f1d1d", "#f8fafc", "#14532d"] if "delta" in value_col else ["#f8fafc", "#8dd3c7", "#0f766e"],
+        color_continuous_midpoint=0 if "delta" in value_col else None,
+        text_auto=".3f",
+    )
+    fig.update_layout(
+        title=title,
+        margin=dict(l=20, r=20, t=70, b=20),
+        coloraxis_colorbar=dict(title=color_title),
+    )
+    fig.update_xaxes(tickangle=-30, automargin=True)
+    fig.update_yaxes(automargin=True)
+    return fig
+
+
+def _build_metric_label_lines(
+    frame: pd.DataFrame,
+    *,
+    title: str,
+    ordered_axes: list[str],
+) -> go.Figure:
+    plot_df = frame.dropna(subset=["value"]).copy()
+    axis_order = {axis: idx for idx, axis in enumerate(ordered_axes)}
+    plot_df["__axis_order"] = plot_df["release_axis"].map(axis_order).fillna(len(axis_order))
+    plot_df = plot_df.sort_values(["label_name", "__axis_order", "release_axis"])
+    fig = px.line(
+        plot_df,
+        x="release_axis",
+        y="value",
+        color="label_name",
+        markers=True,
+        hover_data=["version", "date", "release_name"],
+        title=title,
+    )
+    fig.update_layout(margin=dict(l=20, r=20, t=70, b=20), legend_title_text="Label")
+    fig.update_xaxes(categoryorder="array", categoryarray=ordered_axes, tickangle=-30, automargin=True)
+    fig.update_traces(connectgaps=True)
+    return fig
+
+
+def _horizon_metric_sort_key(metric_name: str) -> tuple[float, str]:
+    horizon_text = str(metric_name).rsplit("@", 1)[-1].removesuffix("s")
+    try:
+        return float(horizon_text), str(metric_name)
+    except ValueError:
+        return float("inf"), str(metric_name)
+
+
+def _horizon_metric_label(metric_name: str) -> str:
+    return str(metric_name).rsplit("@", 1)[-1] if "@" in str(metric_name) else str(metric_name)
+
+
+def _available_prediction_metric_groups(frame: pd.DataFrame) -> dict[str, tuple[str, ...]]:
+    groups: dict[str, tuple[str, ...]] = {}
+    metric_series = frame["metric_name"].dropna().astype(str)
+    for metric_family in ("minADE", "minFDE"):
+        metric_names = sorted(
+            metric_series[metric_series.str.startswith(f"{metric_family}@")].unique().tolist(),
+            key=_horizon_metric_sort_key,
+        )
+        if metric_names:
+            groups[metric_family] = tuple(metric_names)
+    return groups
+
+
+def _build_prediction_label_profile(
+    frame: pd.DataFrame,
+    *,
+    selected_label: str,
+    metric_family: str,
+    metric_names: tuple[str, ...],
+    ordered_axes: list[str],
+) -> go.Figure:
+    profile_df = frame[
+        (frame["metric_name"].isin(metric_names))
+        & (frame["label_name"] == selected_label)
+    ].dropna(subset=["value"]).copy()
+    axis_order = {axis: idx for idx, axis in enumerate(ordered_axes)}
+    profile_df["__axis_order"] = profile_df["release_axis"].map(axis_order).fillna(len(axis_order))
+    profile_df = profile_df.sort_values(["metric_name", "__axis_order", "release_axis"])
+    fig = px.line(
+        profile_df,
+        x="release_axis",
+        y="value",
+        color="metric_name",
+        markers=True,
+        hover_data=["version", "date", "release_name"],
+        title=f"{selected_label} {metric_family} Horizon Profile",
+    )
+    fig.update_layout(margin=dict(l=20, r=20, t=70, b=20), legend_title_text="Horizon")
+    fig.update_xaxes(categoryorder="array", categoryarray=ordered_axes, tickangle=-30, automargin=True)
+    fig.update_traces(connectgaps=True)
+    return fig
+
+
+def _build_prediction_release_label_profile(
+    frame: pd.DataFrame,
+    *,
+    metric_family: str,
+    selected_release_axis: str,
+    selected_labels: list[str],
+    metric_names: tuple[str, ...],
+) -> go.Figure | None:
+    release_df = frame[
+        (frame["release_axis"] == selected_release_axis)
+        & (frame["label_name"].isin(selected_labels))
+        & (frame["metric_name"].isin(metric_names))
+    ].copy()
+    if release_df.empty:
+        return None
+
+    release_df["horizon"] = release_df["metric_name"].map(_horizon_metric_label)
+    release_df["horizon_sort"] = release_df["metric_name"].map(lambda name: _horizon_metric_sort_key(str(name))[0])
+    release_df = release_df.sort_values(["label_name", "horizon_sort"])
+    fig = px.line(
+        release_df,
+        x="horizon",
+        y="value",
+        color="label_name",
+        markers=True,
+        category_orders={"horizon": [_horizon_metric_label(metric_name) for metric_name in metric_names]},
+        hover_data=["version", "date", "release_name"],
+        title=f"{metric_family} by Label and Horizon",
+    )
+    fig.update_layout(
+        height=460,
+        margin=dict(l=20, r=20, t=70, b=30),
+        legend_title_text="Label",
+        xaxis_title="Prediction Horizon",
+        yaxis_title=f"{metric_family} (m)",
+        plot_bgcolor="#ffffff",
+        paper_bgcolor="#ffffff",
+    )
+    fig.update_xaxes(showgrid=False)
+    fig.update_yaxes(gridcolor="rgba(148, 163, 184, 0.18)")
+    return fig
+
+
+def _build_release_frames(groups: list[TrendReleaseGroup]) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    release_rows: list[dict[str, Any]] = []
+    case_rows: list[dict[str, Any]] = []
+    metric_rows: list[dict[str, Any]] = []
+
+    for group in groups:
+        primary_metadata = _select_primary_metadata(group)
+        version = str(primary_metadata.get("pilot_auto_version") or "")
+        date = str(primary_metadata.get("date") or "")
+        description = str(primary_metadata.get("description") or "")
+        data_count = str(primary_metadata.get("data_count") or "")
+        release_row = {
+            "group_key": group.group_key,
+            "release_name": group.display_name,
+            "topic_name": group.topic_name,
+            "group_kind": group.group_kind,
+            "version": version,
+            "date": date,
+            "description": description,
+            "data_count": data_count,
+            "data_count_num": _parse_data_count(data_count),
+            "full_job_id": group.jobs.get("full", {}).get("job_id"),
+            "usecase_job_id": group.jobs.get("usecase", {}).get("job_id"),
+            "devops_job_id": group.jobs.get("devops", {}).get("job_id"),
+            "mAP": None,
+            "precision": None,
+            "recall": None,
+            "FNR": None,
+            "x_error": None,
+            "y_error": None,
+            "yaw_error": None,
+            "speed_error": None,
+            "minADE@1s": None,
+            "minADE@3s": None,
+            "minADE@5s": None,
+            "minFDE@1s": None,
+            "minFDE@3s": None,
+            "minFDE@5s": None,
+            "overall_pass_rate": None,
+            "scenario_count": None,
+            "role_count": len(group.jobs),
+            "roles": ", ".join(sorted(group.jobs.keys())),
+        }
+
+        if "full" in group.jobs:
+            full_summary = group.jobs["full"]["summary"]
+            release_row.update(extract_performance_metrics_from_summary(full_summary))
+            for block in full_summary.get("blocks", []):
+                block_header = str(block.get("header") or "")
+                for table in block.get("tables", []):
+                    table_data = table.get("data", {})
+                    if not isinstance(table_data, dict):
+                        continue
+                    for metric_name, labels in table_data.items():
+                        if not isinstance(labels, dict):
+                            continue
+                        for label_name, value in labels.items():
+                            metric_rows.append(
+                                {
+                                    "group_key": group.group_key,
+                                    "release_name": group.display_name,
+                                    "version": version,
+                                    "date": date,
+                                    "description": description,
+                                    "block_header": block_header,
+                                    "metric_name": metric_name,
+                                    "label_name": label_name,
+                                    "value": pd.to_numeric(value, errors="coerce"),
+                                }
+                            )
+
+        if "devops" in group.jobs:
+            flattened = extract_devops_case_rows(group.jobs["devops"]["summary"])
+            if flattened:
+                total_passed = sum(int(row["passed"]) for row in flattened)
+                total_count = sum(int(row["total"]) for row in flattened)
+                release_row["scenario_count"] = total_count
+                release_row["overall_pass_rate"] = (total_passed / total_count * 100.0) if total_count > 0 else None
+                for row in flattened:
+                    case_rows.append(
+                        {
+                            "group_key": group.group_key,
+                            "release_name": group.display_name,
+                            "version": version,
+                            "date": date,
+                            "description": description,
+                            **row,
+                        }
+                    )
+
+        release_rows.append(release_row)
+
+    release_df = pd.DataFrame(release_rows)
+    if not release_df.empty:
+        release_df["date_sort"] = pd.to_datetime(release_df["date"], format="%Y.%m.%d", errors="coerce")
+        release_df["release_display"] = release_df.apply(
+            lambda row: _release_display_name(row["version"], row["date"], row["description"]),
+            axis=1,
+        )
+    case_df = pd.DataFrame(case_rows)
+    if not case_df.empty:
+        case_df["date_sort"] = pd.to_datetime(case_df["date"], format="%Y.%m.%d", errors="coerce")
+        case_df["release_display"] = case_df.apply(
+            lambda row: _release_display_name(row["version"], row["date"], row["description"]),
+            axis=1,
+        )
+    metric_df = pd.DataFrame(metric_rows)
+    if not metric_df.empty:
+        metric_df["date_sort"] = pd.to_datetime(metric_df["date"], format="%Y.%m.%d", errors="coerce")
+        metric_df["release_display"] = metric_df.apply(
+            lambda row: _release_display_name(row["version"], row["date"], row["description"]),
+            axis=1,
+        )
+    return release_df, case_df, metric_df
+
+
+render_page_hero(
+    kicker="Release Analytics",
+    title="Trend Insights",
+    description="Release history and performance trends.",
+)
+
+groups = discover_trend_release_groups()
+if not groups:
+    st.info("No saved trend metadata was found yet. Use the release trend builder below after the three job summaries are available.")
+    _render_release_trend_builder()
+    st.stop()
+
+try:
+    release_df, case_df, metric_df = _build_release_frames(groups)
+except Exception as exc:
+    st.error(f"Could not build trend insights: {exc}")
+    st.stop()
+
+if not release_df.empty:
+    release_df["topic_family"] = release_df["topic_name"].map(_topic_family)
+
+section_header("Release History")
+release_specsheets = discover_release_specsheet_inventory(get_data_root())
+if release_specsheets:
+    release_specsheets = sorted(
+        release_specsheets,
+        key=lambda row: (
+            pd.to_datetime(row.get("date"), format="%Y.%m.%d", errors="coerce").timestamp()
+            if pd.notna(pd.to_datetime(row.get("date"), format="%Y.%m.%d", errors="coerce"))
+            else -1.0,
+            str(row.get("version") or ""),
+            str(row.get("release") or ""),
+        ),
+        reverse=True,
+    )
+    _render_release_library_table(release_specsheets)
+else:
+    st.info("No imported release library was found. Run `python scripts/import_catalog_analyzer_releases.py --force` to import analyzer output.")
+
+section_header("Release Performance")
+top1, top2, top3, top4, top5 = st.columns(5)
+top1.metric("Performance Groups", f"{len(release_df):,}")
+top2.metric("Unique Versions", f"{release_df['version'].nunique():,}" if not release_df.empty else "0")
+top3.metric("Perception Performance", f"{int((release_df['topic_family'] == 'Perception Performance').sum()):,}" if not release_df.empty else "0")
+top4.metric("ML Model Performance", f"{int((release_df['topic_family'] == 'ML Model Performance').sum()):,}" if not release_df.empty else "0")
+top5.metric("Latest Date", release_df.sort_values("date_sort")["date"].iloc[-1] if not release_df.empty else "n/a")
+
+performance_table_mode = st.segmented_control(
+    "Table view",
+    options=["Dataframe", "Colored bars"],
+    default="Dataframe",
+    key="release_performance_table_mode",
+)
+
+st.markdown("#### Perception Performance")
+_release_performance_table(
+    release_df,
+    family="Perception Performance",
+    empty_message="No Perception Performance release rows are available.",
+    table_mode=performance_table_mode,
+)
+
+st.markdown("#### ML Model Performance")
+_release_performance_table(
+    release_df,
+    family="ML Model Performance",
+    empty_message="No ML Model Performance release rows are available.",
+    table_mode=performance_table_mode,
+)
+
+section_header("Major Performance Scores")
+
+perf_entries = release_df[release_df["full_job_id"].notna()].sort_values(
+    ["date_sort", "version", "release_name"],
+    ascending=[True, True, True],
+)
+major_metric_cols = ["mAP", "precision", "recall"]
+prediction_cols = [
+    "minADE@1s",
+    "minADE@3s",
+    "minADE@5s",
+    "minFDE@1s",
+    "minFDE@3s",
+    "minFDE@5s",
+]
+if not perf_entries.empty and perf_entries[major_metric_cols].notna().any().any():
+    latest_major_rows = (
+        perf_entries.dropna(subset=major_metric_cols, how="all")
+        .sort_values(["date_sort", "version", "release_name"])
+        .groupby("topic_family", dropna=False)
+        .tail(1)
+    )
+    metric_card_cols = st.columns(4)
+    for family, card_col in zip(("Perception Performance", "ML Model Performance"), metric_card_cols[:2]):
+        family_row = latest_major_rows[latest_major_rows["topic_family"] == family]
+        if family_row.empty:
+            card_col.metric(f"{family} mAP", "n/a")
+            continue
+        card_col.metric(
+            f"{family} mAP",
+            f"{family_row['mAP'].iloc[-1]:.3f}" if pd.notna(family_row["mAP"].iloc[-1]) else "n/a",
+        )
+    latest_perception_row = latest_major_rows[latest_major_rows["topic_family"] == "Perception Performance"]
+    latest_model_row = latest_major_rows[latest_major_rows["topic_family"] == "ML Model Performance"]
+    metric_card_cols[2].metric(
+        "Perception Recall",
+        f"{latest_perception_row['recall'].iloc[-1]:.3f}"
+        if not latest_perception_row.empty and pd.notna(latest_perception_row["recall"].iloc[-1])
+        else "n/a",
+    )
+    metric_card_cols[3].metric(
+        "ML Model Recall",
+        f"{latest_model_row['recall'].iloc[-1]:.3f}"
+        if not latest_model_row.empty and pd.notna(latest_model_row["recall"].iloc[-1])
+        else "n/a",
+    )
+    fig = go.Figure()
+    scenario_totals = (
+        perf_entries[perf_entries["topic_family"] == "Perception Performance"]
+        .groupby("version", dropna=False)["data_count_num"]
+        .max()
+        .reindex(perf_entries["version"].drop_duplicates().tolist())
+    )
+    fig.add_bar(
+        x=scenario_totals.index.tolist(),
+        y=scenario_totals.tolist(),
+        name="Data Count",
+        marker_color="#f4a7a7",
+        opacity=0.28,
+        yaxis="y2",
+        hovertemplate="<b>%{x}</b><br>Data Count: %{y:,}<extra></extra>",
+    )
+    metric_styles = {
+        "mAP": "#0f766e",
+        "precision": "#1d4ed8",
+        "recall": "#be123c",
+    }
+    family_dashes = {
+        "Perception Performance": "solid",
+        "ML Model Performance": "dot",
+    }
+    for family in ("Perception Performance", "ML Model Performance"):
+        family_df = perf_entries[perf_entries["topic_family"] == family].copy()
+        if family_df.empty:
+            continue
+        for metric_col in major_metric_cols:
+            metric_df_for_line = family_df.dropna(subset=[metric_col])
+            if metric_df_for_line.empty:
+                continue
+            fig.add_trace(
+                go.Scatter(
+                    x=metric_df_for_line["version"],
+                    y=metric_df_for_line[metric_col],
+                    name=metric_col,
+                    legendgroup=family,
+                    legendgrouptitle_text=family,
+                    mode="lines+markers",
+                    line=dict(
+                        color=metric_styles[metric_col],
+                        width=3,
+                        dash=family_dashes.get(family, "solid"),
+                    ),
+                    marker=dict(size=7),
+                    customdata=metric_df_for_line[["release_name", "date", "data_count", "topic_name"]].to_numpy(),
+                    hovertemplate=(
+                        "<b>%{x}</b><br>"
+                        + f"{family} {metric_col}"
+                        + ": %{y:.3f}<br>Release: %{customdata[0]}<br>Date: %{customdata[1]}<br>Data Count: %{customdata[2]}<br>Topic: %{customdata[3]}<extra></extra>"
+                    ),
+                )
+            )
+    fig.update_layout(
+        title="Major Performance Scores",
+        xaxis_title="Pilot.Auto Version",
+        yaxis_title="Score",
+        yaxis2=dict(title="Data Count", overlaying="y", side="right", showgrid=False),
+        height=520,
+        legend=dict(orientation="h", yanchor="top", y=-0.18, x=0, xanchor="left"),
+        legend_tracegroupgap=18,
+        margin=dict(l=20, r=20, t=80, b=125),
+    )
+    st.plotly_chart(fig, use_container_width=True)
+else:
+    st.info("No grouped major metric trend entries are available yet.")
+
+section_header("Prediction Trend")
+
+prediction_entries = perf_entries[perf_entries["topic_family"] == "Perception Performance"].copy()
+prediction_entries = prediction_entries.sort_values(["date_sort", "version", "release_name"], ascending=[True, True, True])
+
+if not prediction_entries.empty and prediction_entries[prediction_cols].notna().any().any():
+    pred_card_col1, pred_card_col2, pred_card_col3 = st.columns(3)
+    latest_pred_row = prediction_entries.dropna(subset=prediction_cols, how="all").iloc[-1]
+    latest_minade_mean = pd.to_numeric(latest_pred_row[["minADE@1s", "minADE@3s", "minADE@5s"]], errors="coerce").mean()
+    latest_minfde_mean = pd.to_numeric(latest_pred_row[["minFDE@1s", "minFDE@3s", "minFDE@5s"]], errors="coerce").mean()
+    pred_card_col1.metric(
+        "Mean minADE",
+        f"{latest_minade_mean:.2f} m" if pd.notna(latest_minade_mean) else "n/a",
+    )
+    pred_card_col2.metric(
+        "Mean minFDE",
+        f"{latest_minfde_mean:.2f} m" if pd.notna(latest_minfde_mean) else "n/a",
+    )
+    pred_card_col3.metric(
+        "Latest Data Count",
+        f"{int(latest_pred_row['data_count_num']):,}" if pd.notna(latest_pred_row["data_count_num"]) else "n/a",
+    )
+    pred_story = prediction_entries[
+        ["version", "date", "description", "release_name", "data_count", "data_count_num"] + prediction_cols
+    ].copy()
+    pred_fig = go.Figure()
+    pred_fig.add_bar(
+        x=pred_story["version"],
+        y=pred_story["data_count_num"],
+        name="Data Count",
+        marker_color="#fbbf24",
+        opacity=0.20,
+        yaxis="y2",
+        hovertemplate="<b>%{x}</b><br>Data Count: %{y:,}<extra></extra>",
+    )
+    series_specs = [
+        ("minADE@1s", "#0f766e", "solid"),
+        ("minADE@3s", "#14b8a6", "solid"),
+        ("minADE@5s", "#99f6e4", "solid"),
+        ("minFDE@1s", "#1d4ed8", "dot"),
+        ("minFDE@3s", "#60a5fa", "dot"),
+        ("minFDE@5s", "#bfdbfe", "dot"),
+    ]
+    for metric_name, color, dash in series_specs:
+        metric_story = pred_story.dropna(subset=[metric_name])
+        if metric_story.empty:
+            continue
+        pred_fig.add_trace(
+            go.Scatter(
+                x=metric_story["version"],
+                y=metric_story[metric_name],
+                name=metric_name,
+                mode="lines+markers",
+                line=dict(color=color, width=3 if metric_name.endswith("@3s") else 2, dash=dash),
+                marker=dict(size=8),
+                customdata=metric_story[["date", "release_name", "data_count"]].to_numpy(),
+                hovertemplate=(
+                    "<b>%{x}</b><br>"
+                    + metric_name
+                    + ": %{y:.2f} m<br>Date: %{customdata[0]}<br>Release: %{customdata[1]}<br>Data Count: %{customdata[2]}<extra></extra>"
+                ),
+            )
+        )
+    pred_fig.update_layout(
+        title="Prediction Error Trend",
+        xaxis_title="Pilot.Auto Version",
+        yaxis_title="Prediction Error (m)",
+        yaxis2=dict(title="Data Count", overlaying="y", side="right", showgrid=False),
+        height=480,
+        legend=dict(orientation="h", yanchor="top", y=-0.18, x=0, xanchor="left"),
+        margin=dict(l=20, r=20, t=80, b=105),
+        plot_bgcolor="#ffffff",
+        paper_bgcolor="#ffffff",
+    )
+    pred_fig.update_xaxes(showgrid=False)
+    pred_fig.update_yaxes(gridcolor="rgba(148, 163, 184, 0.18)")
+    st.plotly_chart(pred_fig, use_container_width=True)
+else:
+    st.info("No usable grouped prediction trend values are available yet.")
+
+atlas_df = pd.DataFrame()
+release_manifest = pd.DataFrame()
+ordered_release_axes: list[str] = []
+
+if not metric_df.empty:
+    atlas_df = metric_df[metric_df["block_header"] == "全数データセット評価"].copy()
+    atlas_df = atlas_df.sort_values(["date_sort", "version", "release_name"], ascending=[True, True, True])
+    atlas_df["release_axis"] = atlas_df["version"].astype(str) + " | " + atlas_df["date"].astype(str)
+    release_manifest = (
+        atlas_df[["group_key", "release_axis", "version", "date", "release_name", "release_display"]]
+        .drop_duplicates()
+        .reset_index(drop=True)
+    )
+    ordered_release_axes = release_manifest["release_axis"].tolist()
+
+section_header("Pass Rate Trend")
+
+pass_entries = release_df[release_df["devops_job_id"].notna()].sort_values(
+    ["date_sort", "version", "release_name"],
+    ascending=[True, True, True],
+)
+if not pass_entries.empty:
+    pass_entries = pass_entries.copy()
+    pass_entries["pass_axis"] = pass_entries["version"].astype(str) + " | " + pass_entries["date"].astype(str)
+ordered_versions = pass_entries["pass_axis"].drop_duplicates().tolist() if not pass_entries.empty else []
+overall_plot_df = pd.DataFrame()
+major_summary = pd.DataFrame()
+mid_summary = pd.DataFrame()
+
+if not pass_entries.empty and pass_entries["overall_pass_rate"].notna().any():
+    overall_plot_df = pass_entries[
+        ["pass_axis", "date", "release_name", "overall_pass_rate", "scenario_count"]
+    ].rename(columns={"overall_pass_rate": "pass_rate", "scenario_count": "total"}).copy()
+    overall_plot_df = overall_plot_df.rename(columns={"pass_axis": "version"})
+
+if not case_df.empty:
+    case_for_pass = case_df.copy()
+    case_for_pass["pass_axis"] = case_for_pass["version"].astype(str) + " | " + case_for_pass["date"].astype(str)
+    major_summary = (
+        case_for_pass.groupby(["pass_axis", "date", "release_name", "major_category"], dropna=False)[["passed", "total"]]
+        .sum()
+        .reset_index()
+        .rename(columns={"pass_axis": "version"})
+    )
+    major_summary = _with_pass_rate(major_summary)
+
+    mid_summary = (
+        case_for_pass.groupby(
+            ["pass_axis", "date", "release_name", "major_category", "mid_category"],
+            dropna=False,
+        )[["passed", "total"]]
+        .sum()
+        .reset_index()
+        .rename(columns={"pass_axis": "version"})
+    )
+    mid_summary = _with_pass_rate(mid_summary)
+
+if not overall_plot_df.empty:
+    st.plotly_chart(
+        _build_pass_combo_chart(
+            overall_plot_df,
+            title="Overall Pass Rate",
+            versions=ordered_versions,
+            series_col=None,
+            hover_cols=["date", "release_name"],
+        ),
+        use_container_width=True,
+    )
+else:
+    st.info("No grouped pass-rate summaries are available yet.")
+
+if not major_summary.empty:
+    st.plotly_chart(
+        _build_pass_combo_chart(
+            major_summary,
+            title="Major Category Pass Rate",
+            versions=ordered_versions,
+            series_col="major_category",
+        ),
+        use_container_width=True,
+    )
+
+if not mid_summary.empty:
+    mid_summary_all = mid_summary.drop(columns=["major_category"], errors="ignore")
+    st.plotly_chart(
+        _build_pass_combo_chart(
+            mid_summary_all,
+            title="Mid Category Pass Rate",
+            versions=ordered_versions,
+            series_col="mid_category",
+        ),
+        use_container_width=True,
+    )
+
+section_header("Defect Evaluation")
+
+if not case_df.empty and not pass_entries.empty:
+    defect_release_options = pass_entries["release_display"].tolist()
+    selected_defect_release = st.selectbox(
+        "Version",
+        defect_release_options,
+        index=len(defect_release_options) - 1,
+        key="defect_evaluation_release",
+    )
+    selected_defect_row = pass_entries.iloc[defect_release_options.index(selected_defect_release)]
+    selected_defect_case_df = case_df[case_df["group_key"] == selected_defect_row["group_key"]].copy()
+    defect_category_cols = ["major_category", "mid_category", "minor_category"]
+    selected_major_mid = (
+        selected_defect_case_df.groupby(defect_category_cols, dropna=False)[["passed", "total"]]
+        .sum()
+        .reset_index()
+    )
+    selected_major_mid = _with_pass_rate(selected_major_mid)
+    if not selected_major_mid.empty:
+        latest_view_mode = st.radio(
+            "View",
+            ["Bars", "Treemap", "Icicle", "Sunburst"],
+            horizontal=True,
+        )
+        if latest_view_mode == "Bars":
+            mid_level = (
+                selected_defect_case_df.groupby(["major_category", "mid_category"], dropna=False)[["passed", "total"]]
+                .sum()
+                .reset_index()
+            )
+            mid_level = _with_pass_rate(mid_level)
+            mid_level = mid_level.sort_values(
+                ["major_category", "mid_category", "pass_rate", "total"],
+                ascending=[True, True, False, False],
+            )
+            ordered_mid_categories = mid_level["mid_category"].tolist()
+            st.plotly_chart(
+                _build_defect_hierarchy_bars(
+                    mid_level,
+                    category_cols=["major_category", "mid_category"],
+                    color_col="major_category",
+                    title="Major / Mid",
+                ),
+                use_container_width=True,
+            )
+            st.plotly_chart(
+                _build_defect_case_bars(
+                    selected_defect_case_df,
+                    ordered_mid_categories=ordered_mid_categories,
+                ),
+                use_container_width=True,
+            )
+        elif latest_view_mode == "Treemap":
+            latest_fig = px.treemap(
+                selected_major_mid,
+                path=defect_category_cols,
+                values="total",
+                color="pass_rate",
+                color_continuous_scale=["#7f1d1d", "#fef3c7", "#166534"],
+                range_color=(0, 100),
+            )
+            latest_fig.update_layout(margin=dict(l=20, r=20, t=70, b=20))
+            st.plotly_chart(latest_fig, use_container_width=True)
+        elif latest_view_mode == "Icicle":
+            latest_fig = px.icicle(
+                selected_major_mid,
+                path=defect_category_cols,
+                values="total",
+                color="pass_rate",
+                color_continuous_scale=["#7f1d1d", "#fef3c7", "#166534"],
+                range_color=(0, 100),
+            )
+            latest_fig.update_layout(margin=dict(l=20, r=20, t=70, b=20))
+            st.plotly_chart(latest_fig, use_container_width=True)
+        else:
+            latest_fig = px.sunburst(
+                selected_major_mid,
+                path=defect_category_cols,
+                values="total",
+                color="pass_rate",
+                color_continuous_scale=["#7f1d1d", "#fef3c7", "#166534"],
+                range_color=(0, 100),
+            )
+            latest_fig.update_layout(margin=dict(l=20, r=20, t=70, b=20))
+            st.plotly_chart(latest_fig, use_container_width=True)
+
+        case_pass_rate = selected_defect_case_df.copy()
+        case_pass_rate["case"] = case_pass_rate["minor_category"].fillna(case_pass_rate["case_name"])
+        case_pass_rate = case_pass_rate.sort_values(["pass_rate", "total"], ascending=[True, False])
+        with st.expander("Case Pass Rates", expanded=False):
+            st.dataframe(
+                case_pass_rate[
+                    ["major_category", "mid_category", "case", "pass_rate", "passed", "total"]
+                ],
+                use_container_width=True,
+                hide_index=True,
+                column_config={
+                    "pass_rate": st.column_config.NumberColumn("pass_rate", format="%.1f%%"),
+                },
+            )
+    else:
+        st.info("No defect evaluation hierarchy is available yet.")
+else:
+    st.info("No defect evaluation summaries are available yet.")
+
+if not atlas_df.empty:
+    release_options = release_manifest["release_axis"].tolist()
+    section_header("Release Details")
+    selected_detail_release = st.selectbox(
+        "Version",
+        release_options,
+        index=len(release_options) - 1,
+        key="deep_dive_release_detail",
+    )
+    horizon_metric_groups = _available_prediction_metric_groups(atlas_df)
+    available_horizon_families = [metric_family for metric_family in ("minADE", "minFDE") if metric_family in horizon_metric_groups]
+    horizon_labels = sorted(
+        atlas_df[
+            atlas_df["metric_name"].isin(
+                [metric_name for metric_names in horizon_metric_groups.values() for metric_name in metric_names]
+            )
+        ]["label_name"]
+        .dropna()
+        .astype(str)
+        .unique()
+        .tolist()
+    )
+
+    selected_atlas_group_key = release_manifest.loc[
+        release_manifest["release_axis"] == selected_detail_release,
+        "group_key",
+    ].iloc[0]
+    latest_matrix = atlas_df[atlas_df["group_key"] == selected_atlas_group_key].pivot_table(
+        index="metric_name",
+        columns="label_name",
+        values="value",
+        aggfunc="first",
+    ).dropna(how="all")
+    if not latest_matrix.empty:
+        latest_min = latest_matrix.min(axis=1)
+        latest_range = (latest_matrix.max(axis=1) - latest_min).replace(0, 1)
+        latest_norm = latest_matrix.sub(latest_min, axis=0).div(latest_range, axis=0)
+        latest_atlas_fig = px.imshow(
+            latest_norm,
+            aspect="auto",
+            color_continuous_scale=["#f8fafc", "#8dd3c7", "#0f766e"],
+            text_auto=".2f",
+        )
+        latest_atlas_fig.update_traces(
+            text=latest_matrix.round(2).astype(str),
+            hovertemplate="Metric: %{y}<br>Label: %{x}<br>Value: %{text}<extra></extra>",
+        )
+        latest_atlas_fig.update_layout(
+            title="Metric Atlas",
+            margin=dict(l=20, r=20, t=70, b=20),
+            coloraxis_colorbar=dict(title="Relative"),
+        )
+        latest_atlas_fig.update_xaxes(automargin=True)
+        latest_atlas_fig.update_yaxes(automargin=True)
+        st.plotly_chart(latest_atlas_fig, use_container_width=True)
+    else:
+        st.info("No metric atlas is available for the selected release yet.")
+
+    if available_horizon_families and horizon_labels:
+        release_detail_cols = st.columns(len(available_horizon_families))
+        for col, metric_family in zip(release_detail_cols, available_horizon_families):
+            metric_names = horizon_metric_groups[metric_family]
+            family_df = atlas_df[atlas_df["metric_name"].isin(metric_names)].copy()
+            release_fig = _build_prediction_release_label_profile(
+                family_df,
+                metric_family=metric_family,
+                selected_release_axis=selected_detail_release,
+                selected_labels=horizon_labels,
+                metric_names=metric_names,
+            )
+            with col:
+                if release_fig is not None:
+                    st.plotly_chart(release_fig, use_container_width=True)
+                else:
+                    st.info(f"No {metric_family} horizon values are available for the selected release.")
+
+    section_header("Trend Details")
+    if available_horizon_families and horizon_labels:
+        selected_horizon_label = st.selectbox(
+            "Label Trend Focus",
+            horizon_labels,
+            key="prediction_horizon_label_focus",
+        )
+        trend_profile_cols = st.columns(len(available_horizon_families))
+        for col, metric_family in zip(trend_profile_cols, available_horizon_families):
+            metric_names = horizon_metric_groups[metric_family]
+            family_df = atlas_df[atlas_df["metric_name"].isin(metric_names)].copy()
+            profile_fig = _build_prediction_label_profile(
+                family_df,
+                selected_label=selected_horizon_label,
+                metric_family=metric_family,
+                metric_names=metric_names,
+                ordered_axes=ordered_release_axes,
+            )
+            with col:
+                st.plotly_chart(profile_fig, use_container_width=True)
+    else:
+        st.info("No minADE/minFDE horizon trend data is available yet.")
+
+    trend_mode = st.radio(
+        "Trend View",
+        ["Timeline Heatmap", "Label Trend Lines"],
+        horizontal=True,
+        key="detailed_metric_trend_view",
+    )
+
+    metric_options = sorted(atlas_df["metric_name"].dropna().unique().tolist())
+    selected_metric = st.selectbox("Metric", metric_options, key="detailed_metric_trend_metric")
+
+    metric_trend_df = atlas_df[atlas_df["metric_name"] == selected_metric].copy()
+    if not metric_trend_df.empty:
+        if trend_mode == "Timeline Heatmap":
+            explorer_fig = _build_metric_timeline_heatmap(
+                metric_trend_df,
+                value_col="value",
+                title=f"{selected_metric} Timeline Heatmap by Label",
+                color_title=selected_metric,
+            )
+        else:
+            explorer_fig = _build_metric_label_lines(
+                metric_trend_df,
+                title=f"{selected_metric} Label Trend Lines",
+                ordered_axes=ordered_release_axes,
+            )
+        st.plotly_chart(explorer_fig, use_container_width=True)
+    else:
+        st.info("No detailed trend data is available for the selected metric yet.")
+elif not metric_df.empty:
+    st.info("No full-dataset metric atlas data is available yet.")
+
+if not case_df.empty:
+    with st.expander("Case Explorer", expanded=False):
+        filter_col1, filter_col2, filter_col3, filter_col4 = st.columns(4)
+        with filter_col1:
+            selected_major = st.selectbox("Major Category", ["All"] + sorted(case_df["major_category"].dropna().unique().tolist()))
+        case_filtered = case_df.copy()
+        if selected_major != "All":
+            case_filtered = case_filtered[case_filtered["major_category"] == selected_major]
+        with filter_col2:
+            selected_mid = st.selectbox("Mid Category", ["All"] + sorted(case_filtered["mid_category"].dropna().unique().tolist()))
+        if selected_mid != "All":
+            case_filtered = case_filtered[case_filtered["mid_category"] == selected_mid]
+        with filter_col3:
+            selected_minor = st.selectbox("Minor Category", ["All"] + sorted(case_filtered["minor_category"].dropna().unique().tolist()))
+        if selected_minor != "All":
+            case_filtered = case_filtered[case_filtered["minor_category"] == selected_minor]
+        with filter_col4:
+            selected_case = st.selectbox("Case", ["All"] + sorted(case_filtered["case_name"].dropna().unique().tolist()))
+        if selected_case != "All":
+            case_filtered = case_filtered[case_filtered["case_name"] == selected_case]
+
+        st.dataframe(
+            case_filtered.sort_values(["date_sort", "version", "case_name"]).drop(columns=["date_sort"], errors="ignore"),
+            use_container_width=True,
+            hide_index=True,
+        )
+
+with st.expander("Grouped Raw Browser", expanded=False):
+    selection_df = release_df.sort_values(
+        ["date_sort", "version", "release_name"],
+        ascending=[False, False, False],
+    ).reset_index(drop=True)
+    selection_labels = [
+        f"{row.release_display} | roles: {row.roles}"
+        for row in selection_df.itertuples()
+    ]
+    selected_label = st.selectbox("Release Group", selection_labels)
+    selected_release = selection_df.iloc[selection_labels.index(selected_label)]
+    selected_group = next(group for group in groups if group.group_key == selected_release["group_key"])
+
+    group_manifest = {
+        "display_name": selected_group.display_name,
+        "topic_name": selected_group.topic_name,
+        "group_kind": selected_group.group_kind,
+        "base_dir": str(selected_group.base_dir),
+        "jobs": {
+            role: {
+                "job_id": payload["job_id"],
+                "metadata_path": str(payload["metadata_path"]),
+                "summary_path": str(payload["summary_path"]),
+            }
+            for role, payload in selected_group.jobs.items()
+        },
+    }
+
+    detail_col1, detail_col2 = st.columns([0.9, 1.1])
+    with detail_col1:
+        st.markdown("**Release Group Manifest**")
+        st.code(json.dumps(group_manifest, ensure_ascii=False, indent=2), language="json")
+        role_choice = st.selectbox("Child Role", sorted(selected_group.jobs.keys()))
+
+    with detail_col2:
+        st.markdown("**Selected Child Summary JSON**")
+        st.code(
+            json.dumps(selected_group.jobs[role_choice]["summary"], ensure_ascii=False, indent=2)[:30000],
+            language="json",
+        )
+
+_render_release_trend_builder()
+
+if release_specsheets:
+    with st.expander("Debug release inventory paths", expanded=False):
+        st.dataframe(
+            pd.DataFrame(_release_inventory_debug_rows(release_specsheets)),
+            width="stretch",
+            hide_index=True,
+        )
diff --git a/evaluation_dashboard_app/pages/1_TP_Summary.py b/evaluation_dashboard_app/pages/1_TP_Summary.py
index c08e7a1..1425fca 100644
--- a/evaluation_dashboard_app/pages/1_TP_Summary.py
+++ b/evaluation_dashboard_app/pages/1_TP_Summary.py
@@ -2,15 +2,20 @@
 import plotly.express as px
 import pandas as pd
 from lib.path_utils import path_display
+from lib.overview_url_hydrate import try_hydrate_session_from_overview_query_params
 from lib.page_chrome import inject_app_page_styles, render_loaded_data_section, render_page_hero, section_header
 from lib.summary_compare import build_summary_delta
 
 st.set_page_config(layout="wide", page_title="TP Summary", page_icon="📈", initial_sidebar_state="expanded")
+try_hydrate_session_from_overview_query_params()
 inject_app_page_styles()
 
 # ========== Safety Check ==========
 if "runA" not in st.session_state:
-    st.warning("Please load data from the Overview page first.")
+    st.warning(
+        "Please load data from the Overview page first. "
+        "If you already did, open Overview once so the URL includes `run_a=...`, then return (multiple Streamlit replicas)."
+    )
     st.stop()
 
 mode = st.session_state.get("mode", "Single Run")
@@ -71,6 +76,18 @@
     mode=mode,
 )
 
+
+def _apply_compact_chart_layout(fig, *, height: int = 300) -> None:
+    """Keep TP Summary charts visually lighter and more compact."""
+    fig.update_layout(
+        template="plotly_white",
+        height=height,
+        margin=dict(t=48, b=40, l=48, r=18),
+        paper_bgcolor="rgba(248,250,252,0.9)",
+        plot_bgcolor="rgba(255,255,255,0.95)",
+        font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
+    )
+
 # ========== View Selector ==========
 st.sidebar.markdown("##### Scope")
 if mode == "Compare Mode" and all_runs and run_labels and delta_by_label:
@@ -120,6 +137,18 @@
 if tp_col not in df_active.columns:
     st.warning(f"Missing required column: {tp_col}")
     st.stop()
+
+if df_active.empty:
+    if use_delta:
+        _keys = "id and perception_label" if "perception_label" in df_active.columns else "id"
+        st.warning(
+            f"No delta rows: baseline and candidate share no common Summary keys ({_keys}). "
+            "Pick Baseline or Candidate in the sidebar, or load runs with overlapping rows."
+        )
+    else:
+        st.warning("The active Summary has no rows for this view.")
+    st.stop()
+
 tp_values = df_active[tp_col]
 tp_min_val = float(tp_values.min())
 tp_max_val = float(tp_values.max())
@@ -153,7 +182,7 @@
 # ========== Data Filtering ==========
 df_f = df_active[(df_active[tp_col] >= tp_min) & (df_active[tp_col] <= tp_max)].copy()
 
-if clip_vel:
+if clip_vel and not df_f.empty:
     vx_col = "vx_delta" if use_delta else "vx"
     vy_col = "vy_delta" if use_delta else "vy"
     for c in (vx_col, vy_col):
@@ -194,7 +223,8 @@
     section_header("Position RMS (X vs Y)", "Lateral vs longitudinal RMS error; color encodes TP or ΔTP.")
     # Always compare the two sources side by side (before and after/delta)
     if use_delta:
-        # Show both reference and target RMS comparisons for X and Y, as well as their deltas
+        # Show both reference and target RMS comparisons in a tighter 2-up row.
+        rms_left, rms_right = st.columns(2)
         fig_rms_x_compare = px.scatter(
             df_f,
             x="xrms_B",
@@ -208,11 +238,14 @@
                 "xrms_delta": "Δ X RMS",
                 "yrms_delta": "Δ Y RMS",
             },
-            title=f"Scatter: X RMS ({cand}) vs X RMS (A)",
+            title=f"X RMS · {cand} vs A",
             color_continuous_scale="Viridis",
         )
-        fig_rms_x_compare.update_traces(marker=dict(size=8, opacity=0.6))
-        st.plotly_chart(fig_rms_x_compare, width="stretch")
+        fig_rms_x_compare.update_traces(marker=dict(size=7, opacity=0.58))
+        _apply_compact_chart_layout(fig_rms_x_compare, height=290)
+        with rms_left:
+            st.plotly_chart(fig_rms_x_compare, width="stretch")
+
         fig_rms_y_compare = px.scatter(
             df_f,
             x="yrms_B",
@@ -226,11 +259,13 @@
                 "xrms_delta": "Δ X RMS",
                 "yrms_delta": "Δ Y RMS",
             },
-            title=f"Scatter: Y RMS ({cand}) vs Y RMS (A)",
+            title=f"Y RMS · {cand} vs A",
             color_continuous_scale="Viridis",
         )
-        fig_rms_y_compare.update_traces(marker=dict(size=8, opacity=0.6))
-        st.plotly_chart(fig_rms_y_compare, width="stretch")
+        fig_rms_y_compare.update_traces(marker=dict(size=7, opacity=0.58))
+        _apply_compact_chart_layout(fig_rms_y_compare, height=290)
+        with rms_right:
+            st.plotly_chart(fig_rms_y_compare, width="stretch")
     else:
         # Just show the submission's RMS (x/y) for standard analysis
         fig_rms = px.scatter(
@@ -246,13 +281,14 @@
             },
             color_continuous_scale="Viridis",
         )
-        fig_rms.update_traces(marker=dict(size=8, opacity=0.7))
+        fig_rms.update_traces(marker=dict(size=8, opacity=0.68))
+        _apply_compact_chart_layout(fig_rms, height=320)
         st.plotly_chart(fig_rms, width="stretch")
 
 with col2:
     section_header("Velocity (vx vs vy)", "Planar velocity colored by TP or ΔTP.")
 
-    def plot_velocity(df, vx, vy, vx_label, vy_label):
+    def plot_velocity(df, vx, vy, vx_label, vy_label, *, title: str):
         fig = px.scatter(
             df,
             x=vx,
@@ -265,18 +301,32 @@ def plot_velocity(df, vx, vy, vx_label, vy_label):
                 tp_col: "TP",
             },
             color_continuous_scale="Plasma",
-            title=f"{vx_label} vs {vy_label}",
+            title=title,
         )
-        st.plotly_chart(fig, width="stretch")
+        fig.update_traces(marker=dict(size=7, opacity=0.58))
+        _apply_compact_chart_layout(fig, height=290 if use_delta else 320)
+        return fig
 
     if use_delta:
-        plot_velocity(df_f, "vx", "vy", "Vx (A)", "Vy (A)")
-        plot_velocity(df_f, "vx_B", "vy_B", f"Vx ({cand})", f"Vy ({cand})")
+        vel_left, vel_right = st.columns(2)
+        with vel_left:
+            st.plotly_chart(
+                plot_velocity(df_f, "vx", "vy", "Vx (A)", "Vy (A)", title="Velocity · A"),
+                width="stretch",
+            )
+        with vel_right:
+            st.plotly_chart(
+                plot_velocity(df_f, "vx_B", "vy_B", f"Vx ({cand})", f"Vy ({cand})", title=f"Velocity · {cand}"),
+                width="stretch",
+            )
     else:
-        plot_velocity(df_f, "vx", "vy", "Vx", "Vy")
+        st.plotly_chart(
+            plot_velocity(df_f, "vx", "vy", "Vx", "Vy", title="Velocity"),
+            width="stretch",
+        )
 
 # ========== Metric Distribution ==========
-section_header("Metric distribution", "Histogram + marginal box for any Summary column or delta column.")
+section_header("Metric distribution", "Compact secondary views for a selected Summary metric.")
 metrics = ["xstd", "ystd", "xrms", "yrms", "vx", "vy", "TP"]
 metrics_delta = [f"{m}_delta" for m in metrics]
 metric_options = metrics_delta if use_delta else metrics
@@ -287,46 +337,42 @@ def plot_velocity(df, vx, vy, vx_label, vy_label):
     default_index = 0
 metric = st.selectbox("Select metric", metric_options, index=default_index)
 
-# Show a simple, single-color (monochrome) distribution for clarity
+dist_left, dist_right = st.columns(2)
+
 fig_hist = px.histogram(
     df_f,
     x=metric,
-    nbins=40,
+    nbins=36,
     color_discrete_sequence=["#0d9488"],
     marginal="box",
     opacity=0.88,
+    title=f"{metric} distribution",
 )
 fig_hist.update_layout(
-    template="plotly_white",
     showlegend=False,
     bargap=0.04,
     xaxis_title=metric,
     yaxis_title="Count",
-    paper_bgcolor="rgba(248,250,252,0.9)",
-    plot_bgcolor="rgba(255,255,255,0.95)",
-    font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
-    margin=dict(t=36, b=48, l=56, r=28),
 )
-st.plotly_chart(fig_hist, width="stretch")
+_apply_compact_chart_layout(fig_hist, height=280)
+with dist_left:
+    st.plotly_chart(fig_hist, width="stretch")
 
-section_header("Density (violin)", "Shape of the selected metric including outliers.")
 fig_density = px.violin(
     df_f,
     y=metric,
     box=True,
-    points="all",
+    points="outliers",
     color_discrete_sequence=["#312e81"],
+    title=f"{metric} density",
 )
 fig_density.update_layout(
-    template="plotly_white",
     yaxis_title=metric,
     showlegend=False,
-    paper_bgcolor="rgba(248,250,252,0.9)",
-    plot_bgcolor="rgba(255,255,255,0.95)",
-    font=dict(family="system-ui, sans-serif", size=12, color="#334155"),
-    margin=dict(t=36, b=48, l=56, r=28),
 )
-st.plotly_chart(fig_density, width="stretch")
+_apply_compact_chart_layout(fig_density, height=280)
+with dist_right:
+    st.plotly_chart(fig_density, width="stretch")
 
 # ========== Scenario-level Delta Analysis (Compare Mode) ==========
 df_cmp = df_active if use_delta else None
diff --git a/evaluation_dashboard_app/pages/2_Criteria_Based_Score.py b/evaluation_dashboard_app/pages/2_Criteria_Based_Score.py
index 96d13e3..5229f46 100644
--- a/evaluation_dashboard_app/pages/2_Criteria_Based_Score.py
+++ b/evaluation_dashboard_app/pages/2_Criteria_Based_Score.py
@@ -4,6 +4,7 @@
 import plotly.express as px
 import plotly.graph_objects as go
 from lib.path_utils import path_display
+from lib.overview_url_hydrate import try_hydrate_session_from_overview_query_params
 from lib.page_chrome import (
     inject_app_page_styles,
     render_loaded_data_section,
@@ -24,7 +25,15 @@
     export_gate_result,
     failing_scenarios_table,
     gate_summary,
-    infer_criteria_count,
+)
+from lib.score_schema import (
+    SCORE_BLOCK_SIZE,
+    SCORE_NUM_COLS,
+    SCORE_VIEW_METRIC_COLS,
+    build_score_view,
+    infer_score_criteria_count,
+    score_base_cols,
+    score_identity_cols,
 )
 
 st.set_page_config(
@@ -33,7 +42,7 @@
     page_icon="📊",
     initial_sidebar_state="expanded",
 )
-
+try_hydrate_session_from_overview_query_params()
 
 # Plotly theme (multi-run palette aligned with Overview / run cards)
 _COMPARE_RUN_COLORS = ["#312e81", "#0f766e", "#e86a33", "#6b8e23", "#9b59b6", "#1abc9c"]
@@ -111,7 +120,15 @@ def _filter_df_view_by_perception_labels(
     allowed = set(s["id"].unique())
     if not allowed:
         return df_view.iloc[0:0].copy()
-    return df_view.loc[df_view["Scenario"].astype(str).isin(allowed)].copy()
+
+    scenario_key = df_view["Scenario"].astype(str)
+    mask = scenario_key.isin(allowed)
+    if "Dataset" in df_view.columns:
+        # Older generated Score.csv files stored the final scenario suffix in Dataset,
+        # while Summary.csv kept the full id. Keep matching those files too.
+        composite_key = scenario_key + "_" + df_view["Dataset"].astype(str)
+        mask = mask | composite_key.isin(allowed)
+    return df_view.loc[mask].copy()
 
 
 def _filter_df_view_by_scenarios(df_view: pd.DataFrame, selected_scenarios: list) -> pd.DataFrame:
@@ -143,7 +160,10 @@ def _apply_gate_data_filters(
 # Safety check
 # =========================
 if "runA" not in st.session_state:
-    st.warning("Please load data from the Overview page first.")
+    st.warning(
+        "Please load data from the Overview page first. "
+        "If you already did, open Overview once so the URL includes `run_a=...`, then return (multiple Streamlit replicas)."
+    )
     st.stop()
 
 mode = st.session_state.get("mode", "Single Run")
@@ -189,54 +209,14 @@ def _apply_gate_data_filters(
 # Constants
 # =========================
 
-BASE_COLS = ["Scenario", "Option", "GT_OBJ"]
-
-CRITERIA_COLS = [
-    "distance",
-    "nm",
-    "tp_tn",
-    "add",
-    "ail",
-    "uil",
-    "pfn_pfp",
-    "uuid_num",
-    "pass_rate",
-    "max_dist_thresh",
-    "obj_cnts",
-]
-
-BLOCK_COLS = [
-    "distance",
-    "nm",
-    "tp_tn",
-    "add",
-    "ail",
-    "uil",
-    "pfn_pfp",
-    "uuid_num",
-    "pass_rate",
-    "max_dist_thresh",
-    "obj_cnts",
-]
-
-BLOCK_SIZE = len(CRITERIA_COLS)
-
-NUM_COLS = [
-    "distance",
-    "nm",
-    "tp_tn",
-    "add",
-    "ail",
-    "uil",
-    "pfn_pfp",
-    "uuid_num",
-    "pass_rate",
-    "max_dist_thresh",
-]
-
-_criteria_n_a = infer_criteria_count(df_raw_A, BLOCK_SIZE)
+BASE_COLS = score_base_cols(df_raw_A)
+CRITERIA_COLS = SCORE_VIEW_METRIC_COLS
+BLOCK_SIZE = SCORE_BLOCK_SIZE
+NUM_COLS = SCORE_NUM_COLS
+
+_criteria_n_a = infer_score_criteria_count(df_raw_A)
 if mode == "Compare Mode" and compare_runs:
-    CRITERIA_COUNT = min(infer_criteria_count(r["score"], BLOCK_SIZE) for r in compare_runs)
+    CRITERIA_COUNT = min(infer_score_criteria_count(r["score"]) for r in compare_runs)
 else:
     CRITERIA_COUNT = _criteria_n_a
 
@@ -251,19 +231,16 @@ def _apply_gate_data_filters(
 
 
 def build_view(df_raw, criteria_idx):
-    start = 3 + criteria_idx * BLOCK_SIZE
-    end = start + BLOCK_SIZE
-
-    df_view = df_raw.iloc[:, :3].copy()
-    df_view.columns = BASE_COLS
+    return build_score_view(df_raw, criteria_idx)
 
-    block = df_raw.iloc[:, start:end].copy()
-    block.columns = BLOCK_COLS
 
-    df_view = pd.concat([df_view, block], axis=1)
-    for c in NUM_COLS:
-        df_view[c] = pd.to_numeric(df_view[c], errors="coerce")
-    return df_view
+def _add_scenario_display(df: pd.DataFrame) -> pd.DataFrame:
+    d = df.copy()
+    if "Dataset" in d.columns:
+        d["ScenarioDisplay"] = d["Scenario"].astype(str) + " [" + d["Dataset"].astype(str) + "]"
+    else:
+        d["ScenarioDisplay"] = d["Scenario"].astype(str)
+    return d
 
 
 st.sidebar.divider()
@@ -453,13 +430,14 @@ def _gate_compare_overlap_stats(result_a: pd.DataFrame, result_b: pd.DataFrame)
     """Classify scenarios on inner join (same Scenario id in both gate tables)."""
     if result_a is None or result_b is None or result_a.empty or result_b.empty:
         return None
-    a = result_a[["Scenario", "scenario_pass"]].copy()
-    b = result_b[["Scenario", "scenario_pass"]].copy()
+    key_cols = [c for c in score_identity_cols(result_a) if c in result_b.columns]
+    a = result_a[key_cols + ["scenario_pass"]].copy()
+    b = result_b[key_cols + ["scenario_pass"]].copy()
     a["pass_a"] = a["scenario_pass"].map(bool)
     b["pass_b"] = b["scenario_pass"].map(bool)
     outer = a.drop(columns=["scenario_pass"]).merge(
         b.drop(columns=["scenario_pass"]),
-        on="Scenario",
+        on=key_cols,
         how="outer",
         indicator=True,
     )
@@ -500,7 +478,10 @@ def _overlap_scenario_lists(merged: pd.DataFrame) -> dict[str, list[str]]:
             "a_fail_b_pass": [],
             "a_pass_b_fail": [],
         }
-    scen = merged["Scenario"].astype(str)
+    if "Dataset" in merged.columns:
+        scen = merged["Scenario"].astype(str) + " [" + merged["Dataset"].astype(str) + "]"
+    else:
+        scen = merged["Scenario"].astype(str)
     pa = merged["pass_a"].map(bool)
     pb = merged["pass_b"].map(bool)
     return {
@@ -1085,14 +1066,16 @@ def _render_absolute_gates_section(
             "Per-scenario pass rate",
             "Scenarios present in every run (inner join) — filter to focus on regressions or wins.",
         )
+        scenario_key_cols = score_identity_cols(df_views[0])
         merges = []
         for i, lbl in enumerate(cl):
-            g = df_views[i].groupby("Scenario", as_index=False)["pass_rate"].mean()
+            g = df_views[i].groupby(scenario_key_cols, as_index=False)["pass_rate"].mean()
             g = g.rename(columns={"pass_rate": f"pr_{lbl}"})
             merges.append(g)
         per_scenario = merges[0]
         for g in merges[1:]:
-            per_scenario = per_scenario.merge(g, on="Scenario", how="inner")
+            per_scenario = per_scenario.merge(g, on=scenario_key_cols, how="inner")
+        per_scenario = _add_scenario_display(per_scenario)
         pr_base = f"pr_{cl[0]}"
         delta_col = f"delta_{focus_cand}"
         for lbl in cand_only:
@@ -1134,7 +1117,7 @@ def _render_absolute_gates_section(
         elif filter_method == "Custom contains string":
             search = st.text_input("Show scenarios with name containing (case-insensitive):", "")
             per_scenario_vis = (
-                per_scenario[per_scenario["Scenario"].str.contains(search, case=False, na=False)]
+                per_scenario[per_scenario["ScenarioDisplay"].str.contains(search, case=False, na=False)]
                 if search
                 else per_scenario
             )
@@ -1145,7 +1128,7 @@ def _render_absolute_gates_section(
         col_to_run = {f"pr_{lbl}": run_names[i] for i, lbl in enumerate(cl)}
         per_scenario_vis_long = pd.melt(
             per_scenario_vis,
-            id_vars=["Scenario"],
+            id_vars=scenario_key_cols + ["ScenarioDisplay"],
             value_vars=pr_cols_melt,
             var_name="_k",
             value_name="pass_rate",
@@ -1155,7 +1138,7 @@ def _render_absolute_gates_section(
 
         fig = px.bar(
             per_scenario_vis_long,
-            x="Scenario",
+            x="ScenarioDisplay",
             y="pass_rate",
             color="Run",
             color_discrete_map=_px_map,
@@ -1171,7 +1154,7 @@ def _render_absolute_gates_section(
         )
         fig2 = px.bar(
             per_scenario_vis.reindex(per_scenario_vis[delta_col].abs().sort_values(ascending=False).index),
-            x="Scenario",
+            x="ScenarioDisplay",
             y=delta_col,
             color=delta_col,
             color_continuous_scale="RdYlGn",
@@ -1180,7 +1163,7 @@ def _render_absolute_gates_section(
         _plotly_apply_theme(fig2, "Pass rate delta by scenario")
         st.plotly_chart(fig2, width="stretch")
 
-        table_cols = ["Scenario"] + pr_cols_melt + [f"delta_{lbl}" for lbl in cand_only]
+        table_cols = scenario_key_cols + pr_cols_melt + [f"delta_{lbl}" for lbl in cand_only]
         table_cols = [c for c in table_cols if c in per_scenario_vis.columns]
         with st.expander("Show Table: Per Scenario Pass Rates and Deltas"):
             st.dataframe(per_scenario_vis[table_cols], width="stretch")
@@ -1195,7 +1178,7 @@ def _render_absolute_gates_section(
                 per_scenario_vis,
                 x=pr_base,
                 y=f"pr_{focus_cand}",
-                text="Scenario",
+                text="ScenarioDisplay",
                 labels={
                     pr_base: f"Baseline ({cl[0]}) Pass Rate",
                     f"pr_{focus_cand}": f"Candidate ({focus_cand}) Pass Rate",
@@ -1376,7 +1359,8 @@ def _render_absolute_gates_section(
     st.plotly_chart(fig, width="stretch")
 
     section_header("Scenario leaderboard", "Mean pass rate per scenario — tune N and sort direction.")
-    scenario_metric = df_view.groupby("Scenario", as_index=False)["pass_rate"].mean()
+    scenario_key_cols = score_identity_cols(df_view)
+    scenario_metric = df_view.groupby(scenario_key_cols, as_index=False)["pass_rate"].mean()
     top_n = st.number_input("Top N scenarios", min_value=5, max_value=100, value=20, key="single_top_n")
     sort_order = st.radio("Order", ["Highest first", "Lowest first"], horizontal=True, key="single_scen_order")
     scenario_metric = scenario_metric.sort_values(
diff --git a/evaluation_dashboard_app/pages/3_Detection_Stats.py b/evaluation_dashboard_app/pages/3_Detection_Stats.py
index c10fcc7..480203a 100644
--- a/evaluation_dashboard_app/pages/3_Detection_Stats.py
+++ b/evaluation_dashboard_app/pages/3_Detection_Stats.py
@@ -1,5 +1,6 @@
 import html
 from contextlib import contextmanager
+import hashlib
 
 import duckdb
 import streamlit as st
@@ -12,6 +13,15 @@
 from typing import Optional, List, Tuple
 
 from lib.path_utils import path_display
+from lib.detection_stats_debug import (
+    ds_debug_init_session_state,
+    ds_debug_log_exception,
+    ds_debug_log_memory,
+    ds_debug_render_expander,
+    ds_dlog,
+    ds_dtimer,
+)
+from lib.overview_url_hydrate import try_hydrate_session_from_overview_query_params
 from lib.parquet_schema import schema_flags
 from lib.page_chrome import inject_app_page_styles, render_loaded_data_section, render_page_hero
 from lib.ui.detection_stats import (
@@ -73,6 +83,16 @@
 )
 
 
+def _banner_html_with_note(note: str) -> str:
+    base = detection_stats_page_loading_banner_markup()
+    if not note:
+        return base
+    return base.replace(
+        '<span class="ds-plb-sub">Hang tight — large Parquet files can take a moment.</span>',
+        f'<span class="ds-plb-sub">Hang tight — large Parquet files can take a moment.<br>{html.escape(note)}</span>',
+    )
+
+
 def apply_chart_theme(fig, **overrides):
     """Apply unified theme to a Plotly figure; overrides (e.g. height, margin) take precedence."""
     layout_update = {**PLOTLY_LAYOUT_THEME, **overrides}
@@ -281,11 +301,18 @@ def _scalar_metric_spider_compare(
     initial_sidebar_state="expanded",
 )
 
+try_hydrate_session_from_overview_query_params()
+ds_debug_init_session_state(st.session_state)
+
 # =============================
 # Session state from Overview (mode, run paths)
 # =============================
 if "runA" not in st.session_state:
-    st.warning("Please load data from the **Overview** page first (select mode and run(s)).")
+    st.warning(
+        "Please load data from the **Overview** page first (select mode and run(s)). "
+        "If you already did, open **Overview** once so the URL includes `run_a=...` (share link), then return — "
+        "or hard-refresh. With multiple Streamlit replicas, the server-side session may not follow until the URL is synced."
+    )
     st.stop()
 
 inject_app_page_styles()
@@ -316,16 +343,24 @@ def list_parquets_in_run(run_path) -> List[str]:
     return sorted([str(f.resolve()) for f in p.glob("*.parquet")])
 
 # =============================
-# DuckDB Connection
+# DuckDB Connection (one in-memory DB per Streamlit browser session)
 # =============================
-_duckdb_connection: Optional[duckdb.DuckDBPyConnection] = None
-
 def get_duckdb_connection() -> duckdb.DuckDBPyConnection:
-    """Return a shared DuckDB connection for all queries."""
-    global _duckdb_connection
-    if _duckdb_connection is None:
-        _duckdb_connection = duckdb.connect()
-    return _duckdb_connection
+    """Return a DuckDB connection scoped to this Streamlit session."""
+    if "_ds_duckdb" not in st.session_state:
+        st.session_state["_ds_duckdb"] = duckdb.connect()
+    return st.session_state["_ds_duckdb"]
+
+
+def _parquet_selection_fingerprint(paths: List[str]) -> Tuple[Tuple[str, float], ...]:
+    """Path + mtime per file so filter-only reruns skip rebuilding views when data is unchanged."""
+    fp: List[Tuple[str, float]] = []
+    for p in paths:
+        try:
+            fp.append((p, os.path.getmtime(p)))
+        except OSError:
+            fp.append((p, 0.0))
+    return tuple(fp)
 
 # =============================
 # Helper Functions
@@ -359,12 +394,27 @@ def list_values(con, pq: str, expr: str, where: Optional[str] = None) -> List:
         return []
     return df_.iloc[:, 0].dropna().tolist()
 
+
+def _is_detection_stats_eval_flat_cache(path: str) -> bool:
+    p = Path(path)
+    return p.suffix == ".parquet" and p.name.endswith("_eval_flat.parquet")
+
+
 def create_view_eval_flat(con, target_file: str, view_name: str = "view_eval_flat"):
     """Create view_eval_flat with distance bins."""
-    query = f"""
-    CREATE OR REPLACE VIEW {view_name} AS
+    safe_target = target_file.replace("'", "''")
+    if _is_detection_stats_eval_flat_cache(target_file):
+        query = f"CREATE OR REPLACE VIEW {view_name} AS SELECT * FROM parquet_scan('{safe_target}')"
+    else:
+        query = f"CREATE OR REPLACE VIEW {view_name} AS {eval_flat_select_sql(target_file)}"
+    con.execute(query)
+
+
+def eval_flat_select_sql(target_file: str) -> str:
+    safe_target = target_file.replace("'", "''")
+    return f"""
     WITH src AS (
-        SELECT * FROM parquet_scan('{target_file}')
+        SELECT * FROM parquet_scan('{safe_target}')
         UNION BY NAME
         SELECT CAST(NULL AS VARCHAR) AS visibility,
                CAST(NULL AS VARCHAR) AS suite_name,
@@ -411,14 +461,46 @@ def create_view_eval_flat(con, target_file: str, view_name: str = "view_eval_fla
     JOIN bins b
         ON bse.dist_h >= b.bin_start AND bse.dist_h < b.bin_end
     """
-    con.execute(query)
 
-def create_view_tpr_fpr(con, view_name: str = "view_tpr_fpr_by_class_dist_topic", source_eval_flat: str = "view_eval_flat"):
-    """Create TPR/FPR view. source_eval_flat is the name of the eval_flat view to read from."""
-    query = f"""
-    CREATE OR REPLACE VIEW {view_name} AS
-    WITH stats AS (
-        SELECT
+
+def _ds_cache_dir_for_run(run_path: Path) -> Path:
+    return run_path / ".dashboard_cache" / "detection_stats_cache"
+
+
+def _ds_cache_key_for_source(source_path: str) -> str:
+    return hashlib.sha1(source_path.encode("utf-8")).hexdigest()[:12]
+
+
+def _ds_cache_path_for_source(run_path: Path, source_path: str) -> Path:
+    src = Path(source_path)
+    return _ds_cache_dir_for_run(run_path) / f"{src.stem}_{_ds_cache_key_for_source(source_path)}_eval_flat.parquet"
+
+
+def _ensure_detection_stats_eval_flat_cache(
+    con: duckdb.DuckDBPyConnection,
+    *,
+    run_path: Path,
+    source_path: str,
+) -> tuple[str, bool]:
+    """
+    Ensure a materialized eval_flat parquet exists for this source parquet.
+    Returns (cached_parquet_path, rebuilt_flag).
+    """
+    cache_dir = _ds_cache_dir_for_run(run_path)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    cache_path = _ds_cache_path_for_source(run_path, source_path)
+    source_stat = Path(source_path).stat()
+    needs_rebuild = (
+        not cache_path.exists()
+        or cache_path.stat().st_mtime < source_stat.st_mtime
+    )
+    if needs_rebuild:
+        safe_out = str(cache_path).replace("'", "''")
+        con.execute(f"COPY ({eval_flat_select_sql(source_path)}) TO '{safe_out}' (FORMAT PARQUET)")
+    return str(cache_path), needs_rebuild
+
+# Per-(dataset, topic, label, bin, visibility, suite) aggregates — shared by distance-bin rate queries.
+_TPR_FPR_STATS_SELECT = """SELECT
             t4dataset_id,
             topic_name,
             label,
@@ -429,55 +511,110 @@ def create_view_tpr_fpr(con, view_name: str = "view_tpr_fpr_by_class_dist_topic"
             COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) AS gt_total,
             COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS tp_gt,
             COUNT(*) FILTER (WHERE source='EST' AND status IN ('TP','FP')) AS est_total,
-            COUNT(*) FILTER (WHERE source='EST' AND status='FP') AS fp_est
-        FROM {source_eval_flat}
-        GROUP BY
-            t4dataset_id, topic_name, label, distance_bin, bin_idx,
+            COUNT(*) FILTER (WHERE source='EST' AND status='FP') AS fp_est"""
+
+_TPR_FPR_STATS_GROUP_BY = """t4dataset_id, topic_name, label, distance_bin, bin_idx,
             coalesce(try(CAST(visibility AS VARCHAR)), 'not available'),
-            coalesce(try(CAST(suite_name AS VARCHAR)), '')
-    )
-    SELECT
-        *,
-        CASE WHEN gt_total > 0 THEN CAST(tp_gt AS DOUBLE) / gt_total ELSE NULL END AS tpr,
-        CASE WHEN est_total > 0 THEN CAST(fp_est AS DOUBLE) / est_total ELSE NULL END AS fpr
-    FROM stats
+            coalesce(try(CAST(suite_name AS VARCHAR)), '')"""
+
+
+def sql_distance_bin_rates_from_eval_flat(
+    source_eval_flat: str,
+    filter_clause: str,
+    *,
+    metrics: str = "both",
+) -> str:
+    """TPR/FPR by ``distance_bin`` from ``view_eval_flat`` rows, with filters pushed into the stats CTE.
+
+    Distance charts used to ``SELECT ... FROM view_tpr_fpr_* WHERE ...`` (nested view over parquet). On some
+    DuckDB builds that plan can **SIGSEGV** the process (container exit **139**). This query inlines the same
+    stats aggregation and applies ``WHERE`` on the flat view instead.
     """
-    con.execute(query)
+    order_by = "ORDER BY CAST(REPLACE(SPLIT_PART(distance_bin, ',', 1), '[', ' ') AS INTEGER)"
+    inner = f"""
+    WITH stats AS (
+        {_TPR_FPR_STATS_SELECT}
+        FROM {source_eval_flat}
+        WHERE ({filter_clause})
+        GROUP BY
+            {_TPR_FPR_STATS_GROUP_BY}
+    )"""
+    if metrics == "both":
+        return f"""
+        {inner}
+        SELECT
+            distance_bin,
+            CASE WHEN SUM(gt_total) > 0 THEN CAST(SUM(tp_gt) AS DOUBLE) / SUM(gt_total) ELSE 0 END AS tpr,
+            CASE WHEN SUM(est_total) > 0 THEN CAST(SUM(fp_est) AS DOUBLE) / SUM(est_total) ELSE 0 END AS fpr
+        FROM stats
+        GROUP BY distance_bin
+        {order_by}
+        """
+    if metrics == "tpr":
+        return f"""
+        {inner}
+        SELECT distance_bin,
+            CASE WHEN SUM(gt_total) > 0 THEN CAST(SUM(tp_gt) AS DOUBLE) / SUM(gt_total) ELSE 0 END AS tpr
+        FROM stats
+        GROUP BY distance_bin
+        {order_by}
+        """
+    if metrics == "fpr":
+        return f"""
+        {inner}
+        SELECT distance_bin,
+            CASE WHEN SUM(est_total) > 0 THEN CAST(SUM(fp_est) AS DOUBLE) / SUM(est_total) ELSE 0 END AS fpr
+        FROM stats
+        GROUP BY distance_bin
+        {order_by}
+        """
+    raise ValueError(f"metrics must be 'both', 'tpr', or 'fpr', got {metrics!r}")
+
 
 def build_filter_clause(filters: dict,*, enable_dist_h: bool = True) -> str:
-    """Build WHERE clause from filters."""
+    """Build WHERE clause from filters.
+
+    For label / suites / visibility: ``None`` means this dimension is inactive (e.g. no suite column).
+    An empty list ``[]`` means no restriction on that dimension (same as all options selected).
+    Using ``if filters.get('label')`` would treat ``[]`` as falsy and accidentally drop the filter,
+    causing full scans (very slow on large Parquet).
+    """
     conditions = []
     
     if filters.get('topic_name') and filters['topic_name'] != '__all__':
         conditions.append(f"topic_name = '{filters['topic_name']}'")
     
-    if filters.get('label'):
-        if isinstance(filters['label'], list) and len(filters['label']) > 0:
-            # Escape single quotes in labels
-            labels_escaped = [str(l).replace("'", "''") for l in filters['label']]
-            labels_str = "', '".join(labels_escaped)
-            conditions.append(f"label IN ('{labels_str}')")
-        elif not isinstance(filters['label'], list) and filters['label'] != '__all__':
-            label_escaped = str(filters['label']).replace("'", "''")
+    lbl = filters.get('label')
+    if lbl is not None:
+        if isinstance(lbl, list):
+            if len(lbl) > 0:
+                labels_escaped = [str(l).replace("'", "''") for l in lbl]
+                labels_str = "', '".join(labels_escaped)
+                conditions.append(f"label IN ('{labels_str}')")
+        elif not isinstance(lbl, list) and lbl != '__all__':
+            label_escaped = str(lbl).replace("'", "''")
             conditions.append(f"label = '{label_escaped}'")
     
-    if filters.get('suites'):
-        if isinstance(filters['suites'], list) and len(filters['suites']) > 0:
-            suite_escaped = [str(s).replace("'", "''") for s in filters['suites']]
-            suite_str = "', '".join(suite_escaped)
-            conditions.append(f"COALESCE(CAST(suite_name AS VARCHAR), '') IN ('{suite_str}')")
-        elif not isinstance(filters['suites'], list) and filters['suites'] != '__all__':
-            s_escaped = str(filters['suites']).replace("'", "''")
+    su = filters.get('suites')
+    if su is not None:
+        if isinstance(su, list):
+            if len(su) > 0:
+                suite_escaped = [str(s).replace("'", "''") for s in su]
+                suite_str = "', '".join(suite_escaped)
+                conditions.append(f"COALESCE(CAST(suite_name AS VARCHAR), '') IN ('{suite_str}')")
+        elif not isinstance(su, list) and su != '__all__':
+            s_escaped = str(su).replace("'", "''")
             conditions.append(f"COALESCE(CAST(suite_name AS VARCHAR), '') = '{s_escaped}'")
     
-    if filters.get('visibility'):
-        if isinstance(filters['visibility'], list) and len(filters['visibility']) > 0:
-            # Escape single quotes in visibility values
-            vis_escaped = [str(v).replace("'", "''") for v in filters['visibility']]
-            vis_str = "', '".join(vis_escaped)
-            conditions.append(f"COALESCE(visibility, 'not available') IN ('{vis_str}')")
-        elif not isinstance(filters['visibility'], list):
-            vis_escaped = str(filters['visibility']).replace("'", "''")
+    vis = filters.get('visibility')
+    if vis is not None:
+        if isinstance(vis, list):
+            if len(vis) > 0:
+                vis_escaped = [str(v).replace("'", "''") for v in vis]
+                vis_str = "', '".join(vis_escaped)
+                conditions.append(f"COALESCE(visibility, 'not available') IN ('{vis_str}')")
+        elif not isinstance(vis, list):
+            vis_escaped = str(vis).replace("'", "''")
             conditions.append(f"COALESCE(visibility, 'not available') = '{vis_escaped}'")
     
     if enable_dist_h and filters.get('max_eval_range'):
@@ -542,1438 +679,1741 @@ def build_filter_clause(filters: dict,*, enable_dist_h: bool = True) -> str:
             target_files.append(tf)
 
 con = get_duckdb_connection()
-for i, (path, lbl) in enumerate(zip(target_files, run_labels_list)):
-    ok, msg = validate_parquet_file(con, path)
-    if not ok:
-        st.sidebar.error(f"**Run ({lbl}) file** cannot be read: {msg}")
-        st.stop()
+fp = _parquet_selection_fingerprint(target_files)
+cache_hit = st.session_state.get("_ds_parquet_fp") == fp and "_ds_filter_opts" in st.session_state
+selected_run_paths = [Path(r["path"]) for r in runs]
+cached_target_files = list(target_files)
+cache_rebuild_notes: List[str] = []
+
+ds_dlog(
+    "duckdb setup: fp=%s cache_hit=%s n_runs=%s target_files=%s",
+    fp,
+    cache_hit,
+    len(target_files),
+    [os.path.basename(p) for p in target_files],
+)
+ds_debug_log_memory("before_duckdb_validate_views")
+
+with ds_dtimer("duckdb_validate_views_list_values_or_cache", st.session_state):
+    if not cache_hit:
+        for i, (path, lbl) in enumerate(zip(target_files, run_labels_list)):
+            ok, msg = validate_parquet_file(con, path)
+            if not ok:
+                st.sidebar.error(f"**Run ({lbl}) file** cannot be read: {msg}")
+                st.stop()
+
+        # Automatically materialize eval_flat cache parquet(s) under each run.
+        for i, (path, run_path, lbl) in enumerate(zip(target_files, selected_run_paths, run_labels_list)):
+            cached_path, rebuilt = _ensure_detection_stats_eval_flat_cache(
+                con,
+                run_path=run_path,
+                source_path=path,
+            )
+            cached_target_files[i] = cached_path
+            if rebuilt:
+                cache_rebuild_notes.append(f"Run {lbl}: refreshed detection cache from {os.path.basename(path)}")
 
-# Create one eval_flat + tpr_fpr view per run (view_eval_flat_1, view_tpr_fpr_1, ...)
-try:
-    for i, path in enumerate(target_files):
-        v_flat = "view_eval_flat" if i == 0 else f"view_eval_flat_{i}"
-        v_tpr = "view_tpr_fpr_by_class_dist_topic" if i == 0 else f"view_tpr_fpr_{i}"
-        create_view_eval_flat(con, path, v_flat)
-        create_view_tpr_fpr(con, v_tpr, source_eval_flat=v_flat)
-except Exception as e:
-    st.error(f"Error creating views: {e}")
-    st.stop()
+        # One eval_flat view per run. (TPR/FPR layered views are not created: Distance queries inline the same
+        # stats from eval_flat — nested view + aggregate can segfault DuckDB, exit 139.)
+        try:
+            for i, path in enumerate(cached_target_files):
+                v_flat = "view_eval_flat" if i == 0 else f"view_eval_flat_{i}"
+                create_view_eval_flat(con, path, v_flat)
+        except Exception as e:
+            st.error(f"Error creating views: {e}")
+            st.stop()
+
+        # Filter options from first file (applied to all runs)
+        target_file = cached_target_files[0]
+        topics = list_values(con, target_file, "topic_name")
+        labels = list_values(con, target_file, "label")
+        try:
+            suite_options = list_values(con, target_file, "COALESCE(CAST(suite_name AS VARCHAR), '')")
+        except Exception:
+            suite_options = []
+        vis_options = list_values(con, target_file, "COALESCE(CAST(visibility AS VARCHAR), 'not available') AS visibility")
+        schema = schema_flags(con, target_file)
+        st.session_state["_ds_parquet_fp"] = fp
+        st.session_state["_ds_filter_opts"] = {
+            "topics": topics,
+            "labels": labels,
+            "suite_options": suite_options,
+            "vis_options": vis_options,
+            "schema": schema,
+            "cached_target_files": list(cached_target_files),
+            "cache_rebuild_notes": list(cache_rebuild_notes),
+        }
+    else:
+        opts = st.session_state["_ds_filter_opts"]
+        topics = opts["topics"]
+        labels = opts["labels"]
+        suite_options = opts["suite_options"]
+        vis_options = opts["vis_options"]
+        schema = opts["schema"]
+        target_file = target_files[0]
+        cached_target_files = opts.get("cached_target_files", list(target_files))
+        cache_rebuild_notes = opts.get("cache_rebuild_notes", [])
+        for i, path in enumerate(cached_target_files):
+            v_flat = "view_eval_flat" if i == 0 else f"view_eval_flat_{i}"
+            create_view_eval_flat(con, path, v_flat)
+
+ds_debug_log_memory("after_duckdb_validate_views")
 
-# Filter options from first file (applied to all runs)
-target_file = target_files[0]
 with st.sidebar:
-    topics = list_values(con, target_file, "topic_name")
     topic_name = st.selectbox("Topic Name", ["__all__"] + topics, key="topic_name") if topics else "__all__"
-    labels = list_values(con, target_file, "label")
-    selected_labels = st.multiselect("Label(s)", labels, default=labels[:5] if labels and len(labels) > 5 else (labels or []), key="labels")
-    try:
-        suite_options = list_values(con, target_file, "COALESCE(CAST(suite_name AS VARCHAR), '')")
-    except Exception:
-        suite_options = []
-    selected_suites = st.multiselect("Suites", suite_options, default=suite_options, key="suites", help="Filter by suite(s). Default: all included.") if suite_options else []
-    vis_options = list_values(con, target_file, "COALESCE(CAST(visibility AS VARCHAR), 'not available') AS visibility")
-    selected_visibility = st.multiselect("Visibility", vis_options, default=vis_options, key="visibility") if vis_options else []
+    # Widget keys: avoid generic "labels"/"visibility" (session_state collisions, ambiguous with run_labels).
+    if "ds_filter_class_labels" not in st.session_state and "labels" in st.session_state:
+        st.session_state["ds_filter_class_labels"] = st.session_state["labels"]
+    if "ds_filter_visibility" not in st.session_state and "visibility" in st.session_state:
+        st.session_state["ds_filter_visibility"] = st.session_state["visibility"]
+    if labels:
+        if "ds_filter_class_labels" not in st.session_state:
+            st.session_state["ds_filter_class_labels"] = list(labels)
+        selected_labels = st.multiselect(
+            "Label(s)",
+            labels,
+            key="ds_filter_class_labels",
+        )
+    else:
+        selected_labels = []
+    if suite_options:
+        if "suites" not in st.session_state:
+            st.session_state["suites"] = list(suite_options)
+        selected_suites = st.multiselect(
+            "Suites",
+            suite_options,
+            key="suites",
+            help="Filter by suite(s). Default: all included.",
+        )
+    else:
+        selected_suites = []
+    if vis_options:
+        if "ds_filter_visibility" not in st.session_state:
+            st.session_state["ds_filter_visibility"] = list(vis_options)
+        selected_visibility = st.multiselect(
+            "Visibility",
+            vis_options,
+            key="ds_filter_visibility",
+        )
+    else:
+        selected_visibility = []
     max_eval_range = st.selectbox("Max Evaluation Range [m]", [50, 80, 100, 120, 150], index=0, key="max_eval_range")
 
-# Build filters (same values for all runs)
+# Build filters (same values for all runs). None = dimension unused (no suite/visibility column in UI).
 filters_base = {
     'topic_name': topic_name,
     'label': selected_labels,
-    'suites': selected_suites,
-    'visibility': selected_visibility,
+    'suites': selected_suites if suite_options else None,
+    'visibility': selected_visibility if vis_options else None,
     'max_eval_range': max_eval_range
 }
 filters_list = [filters_base] * len(runs)
 
-# Schema flags for optional columns (confidence, velocity, etc.)
-schema = schema_flags(con, target_file)
+try:
+    _fcl_preview = build_filter_clause(filters_base)
+except Exception as _e_fcl:
+    _fcl_preview = f"<build_filter_clause error: {_e_fcl}>"
+ds_dlog("filters_base keys=%s filter_clause_preview=%s", list(filters_base.keys()), _fcl_preview[:800])
 
-# Banner while the rest of the page (queries + charts) streams in — cleared at end of script.
+# Banner while the rest of the page (queries + charts) streams in — cleared in finally (even on errors).
 _ds_loading_banner = st.empty()
-_ds_loading_banner.markdown(detection_stats_page_loading_banner_markup(), unsafe_allow_html=True)
-
-# =============================
-# Main Content
-# =============================
-
-# -----------------------------
-# KPI strip (TP, FP, FN, TPR, FPR, Precision, Recall, F1)
-# -----------------------------
-def _flat_view(i: int) -> str:
-    return "view_eval_flat" if i == 0 else f"view_eval_flat_{i}"
-
-def _kpi_row_for_view(con, view: str, filter_clause: str):
-    """Return dict with tp_gt, fn, tp_est, fp and derived TPR, FPR, Precision, Recall, F1."""
-    q = f"""
-    SELECT
-        COUNT(*) FILTER (WHERE source = 'GT' AND status = 'TP') AS tp_gt,
-        COUNT(*) FILTER (WHERE source = 'GT' AND status = 'FN') AS fn,
-        COUNT(*) FILTER (WHERE source = 'EST' AND status = 'TP') AS tp_est,
-        COUNT(*) FILTER (WHERE source = 'EST' AND status = 'FP') AS fp
-    FROM {view}
-    WHERE {filter_clause}
-    """
-    row = con.execute(q).fetchone()
-    if not row:
-        return None
-    tp_gt, fn, tp_est, fp = int(row[0]), int(row[1]), int(row[2]), int(row[3])
-    gt_total = tp_gt + fn
-    est_total = tp_est + fp
-    tpr = (tp_gt / gt_total) if gt_total > 0 else None
-    fpr = (fp / est_total) if est_total > 0 else None
-    precision = (tp_est / est_total) if est_total > 0 else None
-    recall = tpr
-    if precision is not None and recall is not None and (precision + recall) > 0:
-        f1 = 2 * precision * recall / (precision + recall)
-    else:
-        f1 = None
-    return {
-        "tp": tp_gt, "fp": fp, "fn": fn,
-        "tpr": tpr, "fpr": fpr, "precision": precision, "recall": recall, "f1": f1,
-    }
+_cache_note = " ".join(cache_rebuild_notes)
+_ds_loading_banner.markdown(_banner_html_with_note(_cache_note), unsafe_allow_html=True)
+try:
+    ds_dlog("main_content_try_enter")
+    ds_debug_log_memory("main_content_start")
 
-# =============================
-# Panel 1: t4dataset Summary
-# =============================
-st.markdown(section_header_html("Summary", "Within selected filters and max evaluation range."), unsafe_allow_html=True)
-if single_mode:
-    with ds_spot_loading("Summary · KPI metrics"):
-        fc = build_filter_clause(filters_base)
-        kpi = _kpi_row_for_view(con, "view_eval_flat", fc)
-    inject_detection_stats_kpi_styles()
-    if kpi:
-        html = '<div class="kpi-wrap">' + render_kpi_card("Metrics (within filters & max range)", kpi) + "</div>"
-        st.markdown(html, unsafe_allow_html=True)
+    # =============================
+    # Main Content
+    # =============================
+    
+    # -----------------------------
+    # KPI strip (TP, FP, FN, TPR, FPR, Precision, Recall, F1)
+    # -----------------------------
+    def _flat_view(i: int) -> str:
+        return "view_eval_flat" if i == 0 else f"view_eval_flat_{i}"
+    
+    def _kpi_row_for_view(con, view: str, filter_clause: str):
+        """Return dict with tp_gt, fn, tp_est, fp and derived TPR, FPR, Precision, Recall, F1."""
+        q = f"""
+        SELECT
+            COUNT(*) FILTER (WHERE source = 'GT' AND status = 'TP') AS tp_gt,
+            COUNT(*) FILTER (WHERE source = 'GT' AND status = 'FN') AS fn,
+            COUNT(*) FILTER (WHERE source = 'EST' AND status = 'TP') AS tp_est,
+            COUNT(*) FILTER (WHERE source = 'EST' AND status = 'FP') AS fp
+        FROM {view}
+        WHERE {filter_clause}
+        """
+        row = con.execute(q).fetchone()
+        if not row:
+            return None
+        tp_gt, fn, tp_est, fp = int(row[0]), int(row[1]), int(row[2]), int(row[3])
+        gt_total = tp_gt + fn
+        est_total = tp_est + fp
+        tpr = (tp_gt / gt_total) if gt_total > 0 else None
+        fpr = (fp / est_total) if est_total > 0 else None
+        precision = (tp_est / est_total) if est_total > 0 else None
+        recall = tpr
+        if precision is not None and recall is not None and (precision + recall) > 0:
+            f1 = 2 * precision * recall / (precision + recall)
+        else:
+            f1 = None
+        return {
+            "tp": tp_gt, "fp": fp, "fn": fn,
+            "tpr": tpr, "fpr": fpr, "precision": precision, "recall": recall, "f1": f1,
+        }
+    
+    # =============================
+    # Panel 1: t4dataset Summary
+    # =============================
+    ds_dlog("section: Panel1_Summary_start")
+    st.markdown(section_header_html("Summary", "Within selected filters and max evaluation range."), unsafe_allow_html=True)
+    if single_mode:
+        with ds_spot_loading("Summary · KPI metrics"):
+            fc = build_filter_clause(filters_base)
+            kpi = _kpi_row_for_view(con, "view_eval_flat", fc)
+        inject_detection_stats_kpi_styles()
+        if kpi:
+            html = '<div class="kpi-wrap">' + render_kpi_card("Metrics (within filters & max range)", kpi) + "</div>"
+            st.markdown(html, unsafe_allow_html=True)
+        else:
+            st.caption("No KPI data.")
     else:
-        st.caption("No KPI data.")
-else:
-    with ds_spot_loading("Summary · KPI metrics"):
-        kpis = []
-        for i in range(len(runs)):
-            fc = build_filter_clause(filters_list[i])
-            kpi = _kpi_row_for_view(con, _flat_view(i), fc)
-            kpis.append((run_labels_list[i], kpi))
-    inject_detection_stats_kpi_styles()
-    baseline = kpis[0][1] if kpis else None
-    cards_html_parts = []
-    for lbl, kpi in kpis:
-        deltas = None
-        if baseline and kpi and lbl != run_labels_list[0]:
-            deltas = {
-                "tp": kpi["tp"] - baseline["tp"],
-                "fp": kpi["fp"] - baseline["fp"],
-                "fn": kpi["fn"] - baseline["fn"],
-                "tpr": (kpi["tpr"] - baseline["tpr"]) if (kpi.get("tpr") is not None and baseline.get("tpr") is not None) else None,
-                "fpr": (kpi["fpr"] - baseline["fpr"]) if (kpi.get("fpr") is not None and baseline.get("fpr") is not None) else None,
-                "precision": (kpi["precision"] - baseline["precision"]) if (kpi.get("precision") is not None and baseline.get("precision") is not None) else None,
-                "recall": (kpi["recall"] - baseline["recall"]) if (kpi.get("recall") is not None and baseline.get("recall") is not None) else None,
-                "f1": (kpi["f1"] - baseline["f1"]) if (kpi.get("f1") is not None and baseline.get("f1") is not None) else None,
-            }
-        cards_html_parts.append(render_kpi_card(f"Run {lbl}", kpi or {}, f"kpi-run-{lbl}", deltas=deltas))
-    st.markdown('<div class="kpi-wrap">' + "".join(cards_html_parts) + "</div>", unsafe_allow_html=True)
-
-if st.checkbox("Debug: Inspect Parquet (All Runs)" if not single_mode else "Debug: Inspect Parquet"):
-    cols_used = st.columns(len(target_files))
-    file_labels = [(f"Run ({run_labels_list[i]}) File", target_files[i]) for i in range(len(target_files))]
-    schema_results = []
-    for col, (label, file_path) in zip(cols_used, file_labels):
-        with col:
-            st.markdown(f"### {label}")
-            # Schema
-            schema_df = con.execute("""
-                DESCRIBE SELECT * FROM read_parquet(?)
-            """, [file_path]).df()
-            schema_results.append((label, schema_df))
-            st.write("**Schema (Column Names, Types)**")
-            st.markdown("Shows the schema (column names and their DuckDB/Parquet data types) of the selected Parquet file. Useful to check data structure and types as interpreted by DuckDB.")
-            st.dataframe(schema_df, width='stretch', hide_index=True)
-
-            # Preview rows
-            row_options = [10, 20, 50, 100, 200, "All"]
-            preview_key = f"preview_row_limit_{label.replace(' ', '_').lower()}"
-            row_choice = st.selectbox(f"Preview rows to show ({label})", row_options, index=1, key=preview_key)
-            if row_choice == "All":
-                limit_clause = ""
-            else:
-                limit_clause = f"LIMIT {row_choice}"
-            preview_df = con.execute(f"""
-                SELECT *
-                FROM read_parquet(?)
-                {limit_clause}
-            """, [file_path]).df()
-            st.write(f"**Preview (First {row_choice} rows)**")
-            st.markdown(f"Shows the first {row_choice} preview rows from the Parquet file. Use this preview to examine example data contents and check that your file is as expected.")
-            st.dataframe(preview_df, width='stretch', hide_index=True)
-
-            # Stats
-            stats_df = con.execute("""
-                SELECT
-                    COUNT(*) AS total_rows,
-                    COUNT(t4dataset_id) AS non_null_ids,
-                    COUNT(DISTINCT t4dataset_id) AS distinct_ids
-                FROM read_parquet(?)
-            """, [file_path]).df()
-            st.write("**Stats (Row Count, t4dataset_id non-null count, Distinct t4dataset_id count)**")
-            st.markdown("""
-            - `total_rows`: Total rows in the file  
-            - `non_null_ids`: Rows where t4dataset_id is not null  
-            - `distinct_ids`: Unique t4dataset_id values
-
-            This helps rapidly assess the completeness and distribution of the key ID field.
-            """)
-            st.dataframe(stats_df, width='stretch', hide_index=True)
-
-    # --- Show info about schema differences (compare mode only) ---
-    if not single_mode and len(schema_results) >= 2:
-        with st.expander("⚖️ Difference between schemas", expanded=(len(schema_results) == 2)):
-            if len(schema_results) == 2:
-                label1, df1 = schema_results[0]
-                label2, df2 = schema_results[1]
-                names1 = set(df1["column_name"])
-                names2 = set(df2["column_name"])
-                added, removed = names2 - names1, names1 - names2
-                common = names1 & names2
-                types1 = {row["column_name"]: row["column_type"] for _, row in df1.iterrows()}
-                types2 = {row["column_name"]: row["column_type"] for _, row in df2.iterrows()}
-                dtype_changes = [(c, types1.get(c), types2.get(c)) for c in sorted(common) if types1.get(c) != types2.get(c)]
-                if not (added or removed or dtype_changes):
-                    st.success("✅ The schemas are identical (column names and types match exactly).")
+        with ds_spot_loading("Summary · KPI metrics"):
+            kpis = []
+            for i in range(len(runs)):
+                fc = build_filter_clause(filters_list[i])
+                kpi = _kpi_row_for_view(con, _flat_view(i), fc)
+                kpis.append((run_labels_list[i], kpi))
+        inject_detection_stats_kpi_styles()
+        baseline = kpis[0][1] if kpis else None
+        cards_html_parts = []
+        for lbl, kpi in kpis:
+            deltas = None
+            if baseline and kpi and lbl != run_labels_list[0]:
+                deltas = {
+                    "tp": kpi["tp"] - baseline["tp"],
+                    "fp": kpi["fp"] - baseline["fp"],
+                    "fn": kpi["fn"] - baseline["fn"],
+                    "tpr": (kpi["tpr"] - baseline["tpr"]) if (kpi.get("tpr") is not None and baseline.get("tpr") is not None) else None,
+                    "fpr": (kpi["fpr"] - baseline["fpr"]) if (kpi.get("fpr") is not None and baseline.get("fpr") is not None) else None,
+                    "precision": (kpi["precision"] - baseline["precision"]) if (kpi.get("precision") is not None and baseline.get("precision") is not None) else None,
+                    "recall": (kpi["recall"] - baseline["recall"]) if (kpi.get("recall") is not None and baseline.get("recall") is not None) else None,
+                    "f1": (kpi["f1"] - baseline["f1"]) if (kpi.get("f1") is not None and baseline.get("f1") is not None) else None,
+                }
+            cards_html_parts.append(render_kpi_card(f"Run {lbl}", kpi or {}, f"kpi-run-{lbl}", deltas=deltas))
+        st.markdown('<div class="kpi-wrap">' + "".join(cards_html_parts) + "</div>", unsafe_allow_html=True)
+    
+    if st.checkbox("Debug: Inspect Parquet (All Runs)" if not single_mode else "Debug: Inspect Parquet"):
+        cols_used = st.columns(len(target_files))
+        file_labels = [(f"Run ({run_labels_list[i]}) File", target_files[i]) for i in range(len(target_files))]
+        schema_results = []
+        for col, (label, file_path) in zip(cols_used, file_labels):
+            with col:
+                st.markdown(f"### {label}")
+                # Schema
+                schema_df = con.execute("""
+                    DESCRIBE SELECT * FROM read_parquet(?)
+                """, [file_path]).df()
+                schema_results.append((label, schema_df))
+                st.write("**Schema (Column Names, Types)**")
+                st.markdown("Shows the schema (column names and their DuckDB/Parquet data types) of the selected Parquet file. Useful to check data structure and types as interpreted by DuckDB.")
+                st.dataframe(schema_df, width='stretch', hide_index=True)
+    
+                # Preview rows
+                row_options = [10, 20, 50, 100, 200, "All"]
+                preview_key = f"preview_row_limit_{label.replace(' ', '_').lower()}"
+                row_choice = st.selectbox(f"Preview rows to show ({label})", row_options, index=1, key=preview_key)
+                if row_choice == "All":
+                    limit_clause = ""
+                else:
+                    limit_clause = f"LIMIT {row_choice}"
+                preview_df = con.execute(f"""
+                    SELECT *
+                    FROM read_parquet(?)
+                    {limit_clause}
+                """, [file_path]).df()
+                st.write(f"**Preview (First {row_choice} rows)**")
+                st.markdown(f"Shows the first {row_choice} preview rows from the Parquet file. Use this preview to examine example data contents and check that your file is as expected.")
+                st.dataframe(preview_df, width='stretch', hide_index=True)
+    
+                # Stats
+                stats_df = con.execute("""
+                    SELECT
+                        COUNT(*) AS total_rows,
+                        COUNT(t4dataset_id) AS non_null_ids,
+                        COUNT(DISTINCT t4dataset_id) AS distinct_ids
+                    FROM read_parquet(?)
+                """, [file_path]).df()
+                st.write("**Stats (Row Count, t4dataset_id non-null count, Distinct t4dataset_id count)**")
+                st.markdown("""
+                - `total_rows`: Total rows in the file  
+                - `non_null_ids`: Rows where t4dataset_id is not null  
+                - `distinct_ids`: Unique t4dataset_id values
+    
+                This helps rapidly assess the completeness and distribution of the key ID field.
+                """)
+                st.dataframe(stats_df, width='stretch', hide_index=True)
+    
+        # --- Show info about schema differences (compare mode only) ---
+        if not single_mode and len(schema_results) >= 2:
+            with st.expander("⚖️ Difference between schemas", expanded=(len(schema_results) == 2)):
+                if len(schema_results) == 2:
+                    label1, df1 = schema_results[0]
+                    label2, df2 = schema_results[1]
+                    names1 = set(df1["column_name"])
+                    names2 = set(df2["column_name"])
+                    added, removed = names2 - names1, names1 - names2
+                    common = names1 & names2
+                    types1 = {row["column_name"]: row["column_type"] for _, row in df1.iterrows()}
+                    types2 = {row["column_name"]: row["column_type"] for _, row in df2.iterrows()}
+                    dtype_changes = [(c, types1.get(c), types2.get(c)) for c in sorted(common) if types1.get(c) != types2.get(c)]
+                    if not (added or removed or dtype_changes):
+                        st.success("✅ The schemas are identical (column names and types match exactly).")
+                    else:
+                        if added:
+                            st.error(f"Columns only in `{label2}`: {', '.join(sorted(added))}")
+                        if removed:
+                            st.error(f"Columns only in `{label1}`: {', '.join(sorted(removed))}")
+                        if dtype_changes:
+                            st.warning("Columns with different types:")
+                            st.dataframe(pd.DataFrame(dtype_changes, columns=["Column", f"Type in {label1}", f"Type in {label2}"]), width='stretch', hide_index=True)
                 else:
-                    if added:
-                        st.error(f"Columns only in `{label2}`: {', '.join(sorted(added))}")
-                    if removed:
-                        st.error(f"Columns only in `{label1}`: {', '.join(sorted(removed))}")
-                    if dtype_changes:
-                        st.warning("Columns with different types:")
-                        st.dataframe(pd.DataFrame(dtype_changes, columns=["Column", f"Type in {label1}", f"Type in {label2}"]), width='stretch', hide_index=True)
+                    st.info(f"{len(schema_results)} runs loaded. Compare schemas per run in the columns above.")
+    
+    
+    
+    ds_dlog("section: Dataset_summary_status_distribution_try")
+    try:
+        with ds_spot_loading("Dataset summary & status distribution"):
+            if single_mode:
+                query_base = f"""
+                SELECT COUNT(DISTINCT t4dataset_id) AS id_num, '{os.path.basename(target_file)}' AS series
+                FROM view_eval_flat
+                """
+                df_summary = con.execute(query_base).df()
+                query_status = """
+                SELECT label, status, COUNT(*) AS num
+                FROM view_eval_flat
+                GROUP BY label, status
+                ORDER BY label, status
+                """
+                df_status = con.execute(query_status).df()
             else:
-                st.info(f"{len(schema_results)} runs loaded. Compare schemas per run in the columns above.")
-
-
-
-try:
-    with ds_spot_loading("Dataset summary & status distribution"):
+                parts = [f"SELECT COUNT(DISTINCT t4dataset_id) AS id_num, '{run_labels_list[i]}' AS series FROM {_flat_view(i)}" for i in range(len(runs))]
+                query_base = " UNION ALL ".join(parts)
+                df_summary = con.execute(query_base).df()
+                parts_status = [f"SELECT '{run_labels_list[i]}' AS dataset, label, status, COUNT(*) AS num FROM {_flat_view(i)} GROUP BY label, status" for i in range(len(runs))]
+                query_status = " UNION ALL ".join(parts_status) + " ORDER BY dataset, label, status"
+                df_status = con.execute(query_status).df()
+    
         if single_mode:
-            query_base = f"""
-            SELECT COUNT(DISTINCT t4dataset_id) AS id_num, '{os.path.basename(target_file)}' AS series
-            FROM view_eval_flat
-            """
-            df_summary = con.execute(query_base).df()
-            query_status = """
-            SELECT label, status, COUNT(*) AS num
-            FROM view_eval_flat
-            GROUP BY label, status
-            ORDER BY label, status
-            """
-            df_status = con.execute(query_status).df()
-        else:
-            parts = [f"SELECT COUNT(DISTINCT t4dataset_id) AS id_num, '{run_labels_list[i]}' AS series FROM {_flat_view(i)}" for i in range(len(runs))]
-            query_base = " UNION ALL ".join(parts)
-            df_summary = con.execute(query_base).df()
-            parts_status = [f"SELECT '{run_labels_list[i]}' AS dataset, label, status, COUNT(*) AS num FROM {_flat_view(i)} GROUP BY label, status" for i in range(len(runs))]
-            query_status = " UNION ALL ".join(parts_status) + " ORDER BY dataset, label, status"
-            df_status = con.execute(query_status).df()
-
-    if single_mode:
-        if not df_status.empty:
-            if st.checkbox("Debug: Inspect Status Count (All Runs)" if not single_mode else "Debug: Inspect Status Count"):
-                df_status_wide = df_status.pivot_table(index='label', columns='status', values='num', fill_value=0).reset_index()
-                st.download_button("Download status count (CSV)", data=df_status_wide.to_csv(index=False).encode("utf-8"), file_name="detection_status_count.csv", mime="text/csv", key="dl_status_count")
-                st.dataframe(df_status_wide, width='stretch', hide_index=True)
-            status_viz = st.radio(
-                "Status chart style",
-                options=["Stacked bar (counts)", "Treemap", "100% stacked (proportions)", "Spider chart (TP, FP & FN)"],
-                index=0,
-                horizontal=True,
-                key="status_dist_viz",
-            )
-            n_labels = df_status["label"].nunique()
-            use_horizontal = n_labels > 6
-            if status_viz == "Stacked bar (counts)":
-                if use_horizontal:
-                    fig2 = px.bar(
-                        df_status,
-                        y="label",
-                        x="num",
-                        color="status",
-                        barmode="stack",
-                        title="Status Distribution per Label",
-                        labels={"num": "Count", "label": "Label", "status": "Status"},
-                        color_discrete_map=STATUS_COLORS,
-                        orientation="h",
-                    )
-                else:
-                    fig2 = px.bar(
-                        df_status,
-                        x="label",
-                        y="num",
-                        color="status",
-                        barmode="stack",
-                        title="Status Distribution per Label",
-                        labels={"num": "Count", "label": "Label", "status": "Status"},
-                        color_discrete_map=STATUS_COLORS,
-                    )
-                apply_chart_theme(fig2)
-                st.plotly_chart(fig2, width='stretch')
-            elif status_viz == "Treemap":
-                fig2 = px.treemap(
-                    df_status,
-                    path=["label", "status"],
-                    values="num",
-                    color="status",
-                    color_discrete_map=STATUS_COLORS,
-                    title="Status Distribution per Label (area = count)",
-                )
-                fig2.update_traces(
-                    textinfo="label+value+percent parent",
-                    hovertemplate="%{label}<br>Count: %{value}<extra></extra>",
+            if not df_status.empty:
+                if st.checkbox("Debug: Inspect Status Count (All Runs)" if not single_mode else "Debug: Inspect Status Count"):
+                    df_status_wide = df_status.pivot_table(index='label', columns='status', values='num', fill_value=0).reset_index()
+                    st.download_button("Download status count (CSV)", data=df_status_wide.to_csv(index=False).encode("utf-8"), file_name="detection_status_count.csv", mime="text/csv", key="dl_status_count")
+                    st.dataframe(df_status_wide, width='stretch', hide_index=True)
+                status_viz = st.radio(
+                    "Status chart style",
+                    options=["Stacked bar (counts)", "Treemap", "100% stacked (proportions)", "Spider chart (TP, FP & FN)"],
+                    index=0,
+                    horizontal=True,
+                    key="status_dist_viz",
                 )
-                apply_chart_theme(fig2, height=420)
-                st.plotly_chart(fig2, width='stretch')
-            elif status_viz == "Spider chart (TP, FP & FN)":
-                wide = df_status.pivot_table(index="label", columns="status", values="num", fill_value=0)
-                cats = sorted(wide.index.astype(str).unique())
-                if len(cats) > 16:
-                    st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
-                run_single = [os.path.basename(target_file) if target_file else "Run"]
-                rcols = st.columns(3)
-                for col_i, st_name in enumerate(["TP", "FP", "FN"]):
-                    vals = wide[st_name] if st_name in wide.columns else pd.Series(0, index=wide.index)
-                    df_m = pd.DataFrame({"label": wide.index.astype(str), "count": vals.values})
-                    df_m["run"] = run_single[0]
-                    fig_r = _count_spider_compare(
-                        df_m,
-                        cats,
-                        f"{st_name} count per label",
-                        run_single,
-                        f"{st_name} count",
-                    )
-                    with rcols[col_i]:
-                        st.plotly_chart(fig_r, width='stretch')
-            else:
-                # 100% stacked: proportion per label
-                wide = df_status.pivot_table(index="label", columns="status", values="num", fill_value=0)
-                wide_pct = wide.div(wide.sum(axis=1), axis=0)
-                df_pct = wide_pct.reset_index().melt(id_vars="label", var_name="status", value_name="pct")
-                df_pct = df_pct[df_pct["pct"] > 0]
-                if not df_pct.empty:
+                n_labels = df_status["label"].nunique()
+                use_horizontal = n_labels > 6
+                if status_viz == "Stacked bar (counts)":
                     if use_horizontal:
                         fig2 = px.bar(
-                            df_pct,
+                            df_status,
                             y="label",
-                            x="pct",
+                            x="num",
                             color="status",
                             barmode="stack",
-                            title="Status proportion per Label (100% stacked)",
-                            labels={"pct": "Proportion", "label": "Label", "status": "Status"},
+                            title="Status Distribution per Label",
+                            labels={"num": "Count", "label": "Label", "status": "Status"},
                             color_discrete_map=STATUS_COLORS,
                             orientation="h",
                         )
                     else:
                         fig2 = px.bar(
-                            df_pct,
+                            df_status,
                             x="label",
-                            y="pct",
+                            y="num",
                             color="status",
                             barmode="stack",
-                            title="Status proportion per Label (100% stacked)",
-                            labels={"pct": "Proportion", "label": "Label", "status": "Status"},
+                            title="Status Distribution per Label",
+                            labels={"num": "Count", "label": "Label", "status": "Status"},
                             color_discrete_map=STATUS_COLORS,
                         )
                     apply_chart_theme(fig2)
-                    if use_horizontal:
-                        fig2.update_layout(xaxis_tickformat=".0%", xaxis_range=[0, 1])
-                    else:
-                        fig2.update_layout(yaxis_tickformat=".0%", yaxis_range=[0, 1])
                     st.plotly_chart(fig2, width='stretch')
-                else:
-                    st.info("No data for proportions.")
-        else:
-            st.info("No status count data available")
-    else:
-        if not df_status.empty:
-            if st.checkbox("Debug: Inspect Status Count (All Runs)" if not single_mode else "Debug: Inspect Status Count"):
-                df_status_wide = df_status.pivot_table(index='label', columns=['dataset', 'status'], values='num', fill_value=0)
-                df_status_wide.columns = [f"{col[0]} {col[1]}" for col in df_status_wide.columns]
-                df_status_wide = df_status_wide.reset_index()
-                st.dataframe(df_status_wide, width='stretch', hide_index=True)
-            status_viz = st.radio(
-                "Status chart style",
-                options=["Stacked bar (counts)", "Treemap", "100% stacked (proportions)", "Spider chart (TP, FP & FN)"],
-                index=0,
-                horizontal=True,
-                key="status_dist_viz_compare",
-            )
-            if status_viz == "Stacked bar (counts)":
-                fig2 = px.bar(
-                    df_status,
-                    x="label",
-                    y="num",
-                    color="status",
-                    barmode="stack",
-                    facet_col="dataset",
-                    title="Status Distribution per Label (by Run)",
-                    category_orders={"dataset": run_labels_list},
-                    labels={"num": "Count", "label": "Label", "status": "Status"},
-                    color_discrete_map=STATUS_COLORS,
-                )
-                apply_chart_theme(fig2)
-                st.plotly_chart(fig2, width='stretch')
-            elif status_viz == "Spider chart (TP, FP & FN)":
-                # Same counts as stacked bar: one spider per status (TP / FP / FN), axes = labels, r = count
-                status_wide = df_status.pivot_table(
-                    index=["dataset", "label"], columns="status", values="num", fill_value=0
-                ).reset_index()
-                cats = sorted(df_status["label"].astype(str).unique())
-                if len(cats) > 16:
-                    st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
-                rcols = st.columns(3)
-                for col_i, st_name in enumerate(["TP", "FP", "FN"]):
-                    col_data = (
-                        status_wide[st_name]
-                        if st_name in status_wide.columns
-                        else pd.Series(0, index=status_wide.index)
-                    )
-                    df_m = pd.DataFrame(
-                        {
-                            "run": status_wide["dataset"].astype(str),
-                            "label": status_wide["label"].astype(str),
-                            "count": col_data.values,
-                        }
+                elif status_viz == "Treemap":
+                    fig2 = px.treemap(
+                        df_status,
+                        path=["label", "status"],
+                        values="num",
+                        color="status",
+                        color_discrete_map=STATUS_COLORS,
+                        title="Status Distribution per Label (area = count)",
                     )
-                    fig_r = _count_spider_compare(
-                        df_m,
-                        cats,
-                        f"{st_name} count per label (by run)",
-                        run_labels_list,
-                        f"{st_name} count",
+                    fig2.update_traces(
+                        textinfo="label+value+percent parent",
+                        hovertemplate="%{label}<br>Count: %{value}<extra></extra>",
                     )
-                    with rcols[col_i]:
-                        st.plotly_chart(fig_r, width='stretch')
-            elif status_viz == "Treemap":
-                n_runs = len(run_labels_list)
-                cols = st.columns(min(n_runs, 3))
-                for idx, lbl in enumerate(run_labels_list):
-                    df_r = df_status[df_status["dataset"] == lbl]
-                    if not df_r.empty:
-                        fig_t = px.treemap(
-                            df_r,
-                            path=["label", "status"],
-                            values="num",
-                            color="status",
-                            color_discrete_map=STATUS_COLORS,
-                            title=f"{lbl}",
-                        )
-                        fig_t.update_traces(
-                            textinfo="label+value+percent parent",
-                            hovertemplate="%{label}<br>Count: %{value}<extra></extra>",
+                    apply_chart_theme(fig2, height=420)
+                    st.plotly_chart(fig2, width='stretch')
+                elif status_viz == "Spider chart (TP, FP & FN)":
+                    wide = df_status.pivot_table(index="label", columns="status", values="num", fill_value=0)
+                    cats = sorted(wide.index.astype(str).unique())
+                    if len(cats) > 16:
+                        st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
+                    run_single = [os.path.basename(target_file) if target_file else "Run"]
+                    rcols = st.columns(3)
+                    for col_i, st_name in enumerate(["TP", "FP", "FN"]):
+                        vals = wide[st_name] if st_name in wide.columns else pd.Series(0, index=wide.index)
+                        df_m = pd.DataFrame({"label": wide.index.astype(str), "count": vals.values})
+                        df_m["run"] = run_single[0]
+                        fig_r = _count_spider_compare(
+                            df_m,
+                            cats,
+                            f"{st_name} count per label",
+                            run_single,
+                            f"{st_name} count",
                         )
-                        apply_chart_theme(fig_t, height=360)
-                        with cols[idx % len(cols)]:
-                            st.plotly_chart(fig_t, width='stretch')
-            else:
-                # 100% stacked per run (facet)
-                df_pct_list = []
-                for lbl in run_labels_list:
-                    df_r = df_status[df_status["dataset"] == lbl]
-                    wide = df_r.pivot_table(index="label", columns="status", values="num", fill_value=0)
-                    if wide.empty:
-                        continue
+                        with rcols[col_i]:
+                            st.plotly_chart(fig_r, width='stretch')
+                else:
+                    # 100% stacked: proportion per label
+                    wide = df_status.pivot_table(index="label", columns="status", values="num", fill_value=0)
                     wide_pct = wide.div(wide.sum(axis=1), axis=0)
-                    wide_pct["dataset"] = lbl
-                    wide_pct = wide_pct.reset_index()
-                    df_pct_list.append(wide_pct)
-                if df_pct_list:
-                    wide_all = pd.concat(df_pct_list, ignore_index=True)
-                    df_pct_melt = wide_all.melt(
-                        id_vars=["label", "dataset"],
-                        value_vars=[c for c in wide_all.columns if c not in ("label", "dataset")],
-                        var_name="status",
-                        value_name="pct",
-                    )
-                    df_pct_melt = df_pct_melt[df_pct_melt["pct"] > 0]
-                    if not df_pct_melt.empty:
-                        fig2 = px.bar(
-                            df_pct_melt,
-                            x="label",
-                            y="pct",
-                            color="status",
-                            barmode="stack",
-                            facet_col="dataset",
-                            category_orders={"dataset": run_labels_list},
-                            title="Status proportion per Label (100% stacked, by Run)",
-                            labels={"pct": "Proportion", "label": "Label", "status": "Status"},
-                            color_discrete_map=STATUS_COLORS,
-                        )
+                    df_pct = wide_pct.reset_index().melt(id_vars="label", var_name="status", value_name="pct")
+                    df_pct = df_pct[df_pct["pct"] > 0]
+                    if not df_pct.empty:
+                        if use_horizontal:
+                            fig2 = px.bar(
+                                df_pct,
+                                y="label",
+                                x="pct",
+                                color="status",
+                                barmode="stack",
+                                title="Status proportion per Label (100% stacked)",
+                                labels={"pct": "Proportion", "label": "Label", "status": "Status"},
+                                color_discrete_map=STATUS_COLORS,
+                                orientation="h",
+                            )
+                        else:
+                            fig2 = px.bar(
+                                df_pct,
+                                x="label",
+                                y="pct",
+                                color="status",
+                                barmode="stack",
+                                title="Status proportion per Label (100% stacked)",
+                                labels={"pct": "Proportion", "label": "Label", "status": "Status"},
+                                color_discrete_map=STATUS_COLORS,
+                            )
                         apply_chart_theme(fig2)
-                        fig2.update_layout(
-                            yaxis_tickformat=".0%",
-                            yaxis_range=[0, 1],
-                        )
-                        for ann in fig2.layout.annotations:
-                            ann.text = ann.text.split("=")[-1]
+                        if use_horizontal:
+                            fig2.update_layout(xaxis_tickformat=".0%", xaxis_range=[0, 1])
+                        else:
+                            fig2.update_layout(yaxis_tickformat=".0%", yaxis_range=[0, 1])
                         st.plotly_chart(fig2, width='stretch')
                     else:
                         st.info("No data for proportions.")
-                else:
-                    st.info("No data for proportions.")
-        else:
-            st.info("No status count data available")
-
-except Exception as e:
-    st.error(f"Error in summary: {e}")
-
-
-
-def _tpr_fpr_view(i: int) -> str:
-    return "view_tpr_fpr_by_class_dist_topic" if i == 0 else f"view_tpr_fpr_{i}"
-
-
-def _distance_bin_order_and_label(bin_str: str) -> Tuple[int, str]:
-    """Parse distance_bin e.g. '[0,10)' -> (0, '0–10 m'). Used for sorting and axis labels."""
-    import re
-    s = str(bin_str).strip()
-    m = re.match(r"\[(\d+)\s*,\s*(\d+)\)", s)
-    if m:
-        lo, hi = int(m.group(1)), int(m.group(2))
-        return (lo, f"{lo}–{hi} m")
-    m = re.match(r"\[(\d+)\s*,\s*inf\)", s, re.I)
-    if m:
-        return (int(m.group(1)), f"{m.group(1)}+ m")
-    return (0, s)
-
-
-# Same 10 m bins as view_tpr_fpr / eval_flat (used for object-count alignment)
-_DIST_BIN_CASE = """CASE
-  WHEN dist_h >= 0 AND dist_h < 10 THEN '[0,10)'
-  WHEN dist_h >= 10 AND dist_h < 20 THEN '[10,20)'
-  WHEN dist_h >= 20 AND dist_h < 30 THEN '[20,30)'
-  WHEN dist_h >= 30 AND dist_h < 40 THEN '[30,40)'
-  WHEN dist_h >= 40 AND dist_h < 50 THEN '[40,50)'
-  WHEN dist_h >= 50 AND dist_h < 60 THEN '[50,60)'
-  WHEN dist_h >= 60 AND dist_h < 70 THEN '[60,70)'
-  WHEN dist_h >= 70 AND dist_h < 80 THEN '[70,80)'
-  WHEN dist_h >= 80 AND dist_h < 90 THEN '[80,90)'
-  WHEN dist_h >= 90 AND dist_h < 100 THEN '[90,100)'
-  WHEN dist_h >= 100 AND dist_h < 110 THEN '[100,110)'
-  WHEN dist_h >= 110 AND dist_h < 120 THEN '[110,120)'
-  WHEN dist_h >= 120 AND dist_h < 130 THEN '[120,130)'
-  WHEN dist_h >= 130 AND dist_h < 140 THEN '[130,140)'
-  WHEN dist_h >= 140 AND dist_h < 150 THEN '[140,150)'
-  WHEN dist_h >= 150 THEN '[150,inf)'
-  ELSE '[unknown]' END"""
-
-
-# =============================
-# Panel 3–5: Distance — TP/FP rates by bin + object count vs range
-# =============================
-st.divider()
-st.markdown(
-    section_header_html(
-        "Distance: TP/FP rates & object count",
-        "Same distance bins and chart style (line or bar) for rates and object counts; x-axis order matches across charts.",
-    ),
-    unsafe_allow_html=True,
-)
-rate_by_dist_style = st.radio(
-    "Chart style",
-    options=["Line chart (trend)", "Bar chart (histogram)"],
-    index=0,
-    horizontal=True,
-    key="tp_fp_rate_by_dist_style",
-)
-
-filter_clause_base = build_filter_clause(filters_base, enable_dist_h=False)
-_dist_slot = st.empty()
-_dist_slot.markdown(ds_spot_loading_markup("Distance · TP/FP rates & object counts"), unsafe_allow_html=True)
-try:
-    use_line_chart = rate_by_dist_style == "Line chart (trend)"
-    rate_bin_labels_order: Optional[List[str]] = None
-
-    if single_mode:
-        # Fetch both TP and FP rate by distance
-        query_both = f"""
-        SELECT
-            distance_bin,
-            CASE WHEN SUM(gt_total) > 0 THEN CAST(SUM(tp_gt) AS DOUBLE) / SUM(gt_total) ELSE 0 END AS tpr,
-            CASE WHEN SUM(est_total) > 0 THEN CAST(SUM(fp_est) AS DOUBLE) / SUM(est_total) ELSE 0 END AS fpr
-        FROM view_tpr_fpr_by_class_dist_topic
-        WHERE {filter_clause_base}
-        GROUP BY distance_bin
-        ORDER BY CAST(REPLACE(SPLIT_PART(distance_bin, ',', 1), '[', ' ') AS INTEGER)
-        """
-        df_both = con.execute(query_both).df()
-        if not df_both.empty:
-            df_both["bin_order"], df_both["bin_label"] = zip(
-                *df_both["distance_bin"].map(_distance_bin_order_and_label)
-            )
-            df_both = df_both.sort_values("bin_order")
-            x_labels = df_both["bin_label"].tolist()
-            rate_bin_labels_order = x_labels
-
-            if use_line_chart:
-                fig = go.Figure()
-                fig.add_trace(
-                    go.Scatter(
-                        x=x_labels,
-                        y=df_both["tpr"],
-                        name="TP rate",
-                        mode="lines",
-                        line=dict(color=RUN_COLORS[0], width=2.5, shape="spline"),
-                        fill="tozeroy",
-                        fillcolor="rgba(74, 144, 217, 0.2)",
-                        hovertemplate="%{x}<br>TP rate: %{y:.2%}<extra></extra>",
-                    )
-                )
-                fig.add_trace(
-                    go.Scatter(
-                        x=x_labels,
-                        y=df_both["fpr"],
-                        name="FP rate",
-                        mode="lines",
-                        line=dict(color=RUN_COLORS[1], width=2.5, shape="spline"),
-                        fill="tozeroy",
-                        fillcolor="rgba(232, 106, 51, 0.2)",
-                        hovertemplate="%{x}<br>FP rate: %{y:.2%}<extra></extra>",
-                    )
-                )
-                apply_chart_theme(fig, height=420)
-                fig.update_layout(
-                    title=f"TP & FP rate by distance (within {max_eval_range} m)",
-                    xaxis_title="Distance bin",
-                    yaxis_title="Rate",
-                    yaxis_range=[0, 1],
-                    xaxis=dict(
-                        tickangle=-35,
-                        categoryorder="array",
-                        categoryarray=x_labels,
-                    ),
-                    hovermode="x unified",
-                )
-                fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig, width='stretch')
             else:
-                # Bar chart (histogram): combined TP + FP grouped bars
-                fig = go.Figure()
-                fig.add_trace(
-                    go.Bar(
-                        x=x_labels,
-                        y=df_both["tpr"],
-                        name="TP rate",
-                        marker_color=RUN_COLORS[0],
-                        hovertemplate="%{x}<br>TP rate: %{y:.2%}<extra></extra>",
-                    )
+                st.info("No status count data available")
+        else:
+            if not df_status.empty:
+                if st.checkbox("Debug: Inspect Status Count (All Runs)" if not single_mode else "Debug: Inspect Status Count"):
+                    df_status_wide = df_status.pivot_table(index='label', columns=['dataset', 'status'], values='num', fill_value=0)
+                    df_status_wide.columns = [f"{col[0]} {col[1]}" for col in df_status_wide.columns]
+                    df_status_wide = df_status_wide.reset_index()
+                    st.dataframe(df_status_wide, width='stretch', hide_index=True)
+                status_viz = st.radio(
+                    "Status chart style",
+                    options=["Stacked bar (counts)", "Treemap", "100% stacked (proportions)", "Spider chart (TP, FP & FN)"],
+                    index=0,
+                    horizontal=True,
+                    key="status_dist_viz_compare",
                 )
-                fig.add_trace(
-                    go.Bar(
-                        x=x_labels,
-                        y=df_both["fpr"],
-                        name="FP rate",
-                        marker_color=RUN_COLORS[1],
-                        hovertemplate="%{x}<br>FP rate: %{y:.2%}<extra></extra>",
+                if status_viz == "Stacked bar (counts)":
+                    fig2 = px.bar(
+                        df_status,
+                        x="label",
+                        y="num",
+                        color="status",
+                        barmode="stack",
+                        facet_col="dataset",
+                        title="Status Distribution per Label (by Run)",
+                        category_orders={"dataset": run_labels_list},
+                        labels={"num": "Count", "label": "Label", "status": "Status"},
+                        color_discrete_map=STATUS_COLORS,
                     )
-                )
-                apply_chart_theme(fig, height=420)
-                fig.update_layout(
-                    title=f"TP & FP rate by distance (within {max_eval_range} m)",
-                    xaxis_title="Distance bin",
-                    yaxis_title="Rate",
-                    yaxis_range=[0, 1],
-                    barmode="group",
-                    xaxis=dict(
-                        tickangle=-35,
-                        categoryorder="array",
-                        categoryarray=x_labels,
-                    ),
-                    hovermode="x unified",
-                )
-                fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig, width='stretch')
-        else:
-            st.info("No distance-bin data available.")
-    else:
-        # Compare mode: fetch TP and FP by distance per run
-        dfs_tpr = []
-        for i in range(len(runs)):
-            fc = build_filter_clause(filters_list[i], enable_dist_h=False)
-            q = f"""
-            SELECT distance_bin,
-                CASE WHEN SUM(gt_total) > 0 THEN CAST(SUM(tp_gt) AS DOUBLE) / SUM(gt_total) ELSE 0 END AS tpr
-            FROM {_tpr_fpr_view(i)}
-            WHERE {fc}
-            GROUP BY distance_bin
-            ORDER BY CAST(REPLACE(SPLIT_PART(distance_bin, ',', 1), '[', ' ') AS INTEGER)
-            """
-            df_i = con.execute(q).df()
-            df_i["run"] = run_labels_list[i]
-            df_i["bin_order"], df_i["bin_label"] = zip(*df_i["distance_bin"].map(_distance_bin_order_and_label))
-            df_i = df_i.sort_values("bin_order")
-            dfs_tpr.append(df_i)
-        df_tpr_dist = pd.concat(dfs_tpr, ignore_index=True)
-
-        dfs_fpr = []
-        for i in range(len(runs)):
-            fc = build_filter_clause(filters_list[i], enable_dist_h=False)
-            q = f"""
-            SELECT distance_bin,
-                CASE WHEN SUM(est_total) > 0 THEN CAST(SUM(fp_est) AS DOUBLE) / SUM(est_total) ELSE 0 END AS fpr
-            FROM {_tpr_fpr_view(i)}
-            WHERE {fc}
-            GROUP BY distance_bin
-            ORDER BY CAST(REPLACE(SPLIT_PART(distance_bin, ',', 1), '[', ' ') AS INTEGER)
-            """
-            df_i = con.execute(q).df()
-            df_i["run"] = run_labels_list[i]
-            df_i["bin_order"], df_i["bin_label"] = zip(*df_i["distance_bin"].map(_distance_bin_order_and_label))
-            df_i = df_i.sort_values("bin_order")
-            dfs_fpr.append(df_i)
-        df_fpr_dist = pd.concat(dfs_fpr, ignore_index=True)
-
-        if not df_tpr_dist.empty:
-            rate_bin_labels_order = (
-                df_tpr_dist[df_tpr_dist["run"] == run_labels_list[0]]
-                .sort_values("bin_order")["bin_label"]
-                .tolist()
-            )
-        _xaxis_dist_bins = (
-            dict(tickangle=-35, categoryorder="array", categoryarray=rate_bin_labels_order)
-            if rate_bin_labels_order
-            else dict(tickangle=-35)
-        )
-
-        if use_line_chart:
-            if not df_tpr_dist.empty:
-                fig_tpr = go.Figure()
-                for i, lbl in enumerate(run_labels_list):
-                    d = df_tpr_dist[df_tpr_dist["run"] == lbl].sort_values("bin_order")
-                    c = RUN_COLORS[i % len(RUN_COLORS)]
-                    r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
-                    fig_tpr.add_trace(
-                        go.Scatter(
-                            x=d["bin_label"],
-                            y=d["tpr"],
-                            name=lbl,
-                            mode="lines",
-                            line=dict(color=c, width=2.2, shape="spline"),
-                            fill="tozeroy",
-                            fillcolor=f"rgba({r},{g},{b},0.15)",
-                            hovertemplate=f"{lbl}<br>%{{x}}<br>TP rate: %{{y:.2%}}<extra></extra>",
+                    apply_chart_theme(fig2)
+                    st.plotly_chart(fig2, width='stretch')
+                elif status_viz == "Spider chart (TP, FP & FN)":
+                    # Same counts as stacked bar: one spider per status (TP / FP / FN), axes = labels, r = count
+                    status_wide = df_status.pivot_table(
+                        index=["dataset", "label"], columns="status", values="num", fill_value=0
+                    ).reset_index()
+                    cats = sorted(df_status["label"].astype(str).unique())
+                    if len(cats) > 16:
+                        st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
+                    rcols = st.columns(3)
+                    for col_i, st_name in enumerate(["TP", "FP", "FN"]):
+                        col_data = (
+                            status_wide[st_name]
+                            if st_name in status_wide.columns
+                            else pd.Series(0, index=status_wide.index)
                         )
-                    )
-                apply_chart_theme(fig_tpr, height=420)
-                fig_tpr.update_layout(
-                    title=f"TP rate by distance",
-                    xaxis_title="Distance bin",
-                    yaxis_title="TP rate",
-                    yaxis_range=[0, 1],
-                    xaxis=_xaxis_dist_bins,
-                    hovermode="x unified",
-                )
-                fig_tpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig_tpr, width='stretch')
-            else:
-                st.info("No TP rate by distance data.")
-
-            if not df_fpr_dist.empty:
-                fig_fpr = go.Figure()
-                for i, lbl in enumerate(run_labels_list):
-                    d = df_fpr_dist[df_fpr_dist["run"] == lbl].sort_values("bin_order")
-                    c = RUN_COLORS[i % len(RUN_COLORS)]
-                    r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
-                    fig_fpr.add_trace(
-                        go.Scatter(
-                            x=d["bin_label"],
-                            y=d["fpr"],
-                            name=lbl,
-                            mode="lines",
-                            line=dict(color=c, width=2.2, shape="spline"),
-                            fill="tozeroy",
-                            fillcolor=f"rgba({r},{g},{b},0.15)",
-                            hovertemplate=f"{lbl}<br>%{{x}}<br>FP rate: %{{y:.2%}}<extra></extra>",
+                        df_m = pd.DataFrame(
+                            {
+                                "run": status_wide["dataset"].astype(str),
+                                "label": status_wide["label"].astype(str),
+                                "count": col_data.values,
+                            }
                         )
-                    )
-                apply_chart_theme(fig_fpr, height=420)
-                fig_fpr.update_layout(
-                    title=f"FP rate by distance",
-                    xaxis_title="Distance bin",
-                    yaxis_title="FP rate",
-                    yaxis_range=[0, 1],
-                    xaxis=_xaxis_dist_bins,
-                    hovermode="x unified",
-                )
-                fig_fpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig_fpr, width='stretch')
-            else:
-                st.info("No FP rate by distance data.")
-        else:
-            # Bar chart (histogram) for compare: TP then FP, grouped by run
-            if not df_tpr_dist.empty:
-                fig_tpr = go.Figure()
-                for i, lbl in enumerate(run_labels_list):
-                    d = df_tpr_dist[df_tpr_dist["run"] == lbl].sort_values("bin_order")
-                    fig_tpr.add_trace(
+                        fig_r = _count_spider_compare(
+                            df_m,
+                            cats,
+                            f"{st_name} count per label (by run)",
+                            run_labels_list,
+                            f"{st_name} count",
+                        )
+                        with rcols[col_i]:
+                            st.plotly_chart(fig_r, width='stretch')
+                elif status_viz == "Treemap":
+                    n_runs = len(run_labels_list)
+                    cols = st.columns(min(n_runs, 3))
+                    for idx, lbl in enumerate(run_labels_list):
+                        df_r = df_status[df_status["dataset"] == lbl]
+                        if not df_r.empty:
+                            fig_t = px.treemap(
+                                df_r,
+                                path=["label", "status"],
+                                values="num",
+                                color="status",
+                                color_discrete_map=STATUS_COLORS,
+                                title=f"{lbl}",
+                            )
+                            fig_t.update_traces(
+                                textinfo="label+value+percent parent",
+                                hovertemplate="%{label}<br>Count: %{value}<extra></extra>",
+                            )
+                            apply_chart_theme(fig_t, height=360)
+                            with cols[idx % len(cols)]:
+                                st.plotly_chart(fig_t, width='stretch')
+                else:
+                    # 100% stacked per run (facet)
+                    df_pct_list = []
+                    for lbl in run_labels_list:
+                        df_r = df_status[df_status["dataset"] == lbl]
+                        wide = df_r.pivot_table(index="label", columns="status", values="num", fill_value=0)
+                        if wide.empty:
+                            continue
+                        wide_pct = wide.div(wide.sum(axis=1), axis=0)
+                        wide_pct["dataset"] = lbl
+                        wide_pct = wide_pct.reset_index()
+                        df_pct_list.append(wide_pct)
+                    if df_pct_list:
+                        wide_all = pd.concat(df_pct_list, ignore_index=True)
+                        df_pct_melt = wide_all.melt(
+                            id_vars=["label", "dataset"],
+                            value_vars=[c for c in wide_all.columns if c not in ("label", "dataset")],
+                            var_name="status",
+                            value_name="pct",
+                        )
+                        df_pct_melt = df_pct_melt[df_pct_melt["pct"] > 0]
+                        if not df_pct_melt.empty:
+                            fig2 = px.bar(
+                                df_pct_melt,
+                                x="label",
+                                y="pct",
+                                color="status",
+                                barmode="stack",
+                                facet_col="dataset",
+                                category_orders={"dataset": run_labels_list},
+                                title="Status proportion per Label (100% stacked, by Run)",
+                                labels={"pct": "Proportion", "label": "Label", "status": "Status"},
+                                color_discrete_map=STATUS_COLORS,
+                            )
+                            apply_chart_theme(fig2)
+                            fig2.update_layout(
+                                yaxis_tickformat=".0%",
+                                yaxis_range=[0, 1],
+                            )
+                            for ann in fig2.layout.annotations:
+                                ann.text = ann.text.split("=")[-1]
+                            st.plotly_chart(fig2, width='stretch')
+                        else:
+                            st.info("No data for proportions.")
+                    else:
+                        st.info("No data for proportions.")
+            else:
+                st.info("No status count data available")
+    
+    except Exception as e:
+        st.error(f"Error in summary: {e}")
+    
+    
+    
+    def _distance_bin_order_and_label(bin_str: str) -> Tuple[int, str]:
+        """Parse distance_bin e.g. '[0,10)' -> (0, '0–10 m'). Used for sorting and axis labels."""
+        import re
+        s = str(bin_str).strip()
+        m = re.match(r"\[(\d+)\s*,\s*(\d+)\)", s)
+        if m:
+            lo, hi = int(m.group(1)), int(m.group(2))
+            return (lo, f"{lo}–{hi} m")
+        m = re.match(r"\[(\d+)\s*,\s*inf\)", s, re.I)
+        if m:
+            return (int(m.group(1)), f"{m.group(1)}+ m")
+        return (0, s)
+    
+    
+    # Same 10 m bins as eval_flat / TPR-FPR stats (used for object-count alignment)
+    _DIST_BIN_CASE = """CASE
+      WHEN dist_h >= 0 AND dist_h < 10 THEN '[0,10)'
+      WHEN dist_h >= 10 AND dist_h < 20 THEN '[10,20)'
+      WHEN dist_h >= 20 AND dist_h < 30 THEN '[20,30)'
+      WHEN dist_h >= 30 AND dist_h < 40 THEN '[30,40)'
+      WHEN dist_h >= 40 AND dist_h < 50 THEN '[40,50)'
+      WHEN dist_h >= 50 AND dist_h < 60 THEN '[50,60)'
+      WHEN dist_h >= 60 AND dist_h < 70 THEN '[60,70)'
+      WHEN dist_h >= 70 AND dist_h < 80 THEN '[70,80)'
+      WHEN dist_h >= 80 AND dist_h < 90 THEN '[80,90)'
+      WHEN dist_h >= 90 AND dist_h < 100 THEN '[90,100)'
+      WHEN dist_h >= 100 AND dist_h < 110 THEN '[100,110)'
+      WHEN dist_h >= 110 AND dist_h < 120 THEN '[110,120)'
+      WHEN dist_h >= 120 AND dist_h < 130 THEN '[120,130)'
+      WHEN dist_h >= 130 AND dist_h < 140 THEN '[130,140)'
+      WHEN dist_h >= 140 AND dist_h < 150 THEN '[140,150)'
+      WHEN dist_h >= 150 THEN '[150,inf)'
+      ELSE '[unknown]' END"""
+    
+    
+    # =============================
+    # Panel 3–5: Distance — TP/FP rates by bin + object count vs range
+    # =============================
+    ds_dlog("section: Panel3_5_Distance_start")
+    st.divider()
+    st.markdown(
+        section_header_html(
+            "Distance: TP/FP rates & object count",
+            "Same distance bins and chart style (line or bar) for rates and object counts; x-axis order matches across charts.",
+        ),
+        unsafe_allow_html=True,
+    )
+    rate_by_dist_style = st.radio(
+        "Chart style",
+        options=["Line chart (trend)", "Bar chart (histogram)"],
+        index=0,
+        horizontal=True,
+        key="tp_fp_rate_by_dist_style",
+    )
+    
+    filter_clause_base = build_filter_clause(filters_base, enable_dist_h=False)
+    ds_dlog(
+        "distance: filter_clause_base (no dist_h) len=%s preview=%s",
+        len(filter_clause_base),
+        filter_clause_base[:600],
+    )
+    _dist_slot = st.empty()
+    _dist_slot.markdown(ds_spot_loading_markup("Distance · TP/FP rates & object counts"), unsafe_allow_html=True)
+    try:
+        ds_dlog("distance_inner_try: single_mode=%s", single_mode)
+        ds_debug_log_memory("distance_inner_try_start")
+        use_line_chart = rate_by_dist_style == "Line chart (trend)"
+        rate_bin_labels_order: Optional[List[str]] = None
+    
+        if single_mode:
+            # Inline stats from view_eval_flat (avoid nested TPR/FPR view — DuckDB can SIGSEGV on that plan).
+            query_both = sql_distance_bin_rates_from_eval_flat(
+                "view_eval_flat", filter_clause_base, metrics="both"
+            )
+            ds_dlog("distance: executing query_both (single_mode TPR/FPR by bin, inlined from eval_flat)")
+            df_both = con.execute(query_both).df()
+            ds_dlog("distance: query_both done rows=%s cols=%s", len(df_both), list(df_both.columns))
+            ds_debug_log_memory("distance_after_query_both")
+            if not df_both.empty:
+                df_both["bin_order"], df_both["bin_label"] = zip(
+                    *df_both["distance_bin"].map(_distance_bin_order_and_label)
+                )
+                df_both = df_both.sort_values("bin_order")
+                x_labels = df_both["bin_label"].tolist()
+                rate_bin_labels_order = x_labels
+    
+                if use_line_chart:
+                    fig = go.Figure()
+                    fig.add_trace(
+                        go.Scatter(
+                            x=x_labels,
+                            y=df_both["tpr"],
+                            name="TP rate",
+                            mode="lines",
+                            line=dict(color=RUN_COLORS[0], width=2.5, shape="spline"),
+                            fill="tozeroy",
+                            fillcolor="rgba(74, 144, 217, 0.2)",
+                            hovertemplate="%{x}<br>TP rate: %{y:.2%}<extra></extra>",
+                        )
+                    )
+                    fig.add_trace(
+                        go.Scatter(
+                            x=x_labels,
+                            y=df_both["fpr"],
+                            name="FP rate",
+                            mode="lines",
+                            line=dict(color=RUN_COLORS[1], width=2.5, shape="spline"),
+                            fill="tozeroy",
+                            fillcolor="rgba(232, 106, 51, 0.2)",
+                            hovertemplate="%{x}<br>FP rate: %{y:.2%}<extra></extra>",
+                        )
+                    )
+                    apply_chart_theme(fig, height=420)
+                    fig.update_layout(
+                        title=f"TP & FP rate by distance (within {max_eval_range} m)",
+                        xaxis_title="Distance bin",
+                        yaxis_title="Rate",
+                        yaxis_range=[0, 1],
+                        xaxis=dict(
+                            tickangle=-35,
+                            categoryorder="array",
+                            categoryarray=x_labels,
+                        ),
+                        hovermode="x unified",
+                    )
+                    fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig, width='stretch')
+                else:
+                    # Bar chart (histogram): combined TP + FP grouped bars
+                    fig = go.Figure()
+                    fig.add_trace(
                         go.Bar(
-                            x=d["bin_label"],
-                            y=d["tpr"],
-                            name=lbl,
-                            marker_color=RUN_COLORS[i % len(RUN_COLORS)],
-                            hovertemplate=f"{lbl}<br>%{{x}}<br>TP rate: %{{y:.2%}}<extra></extra>",
+                            x=x_labels,
+                            y=df_both["tpr"],
+                            name="TP rate",
+                            marker_color=RUN_COLORS[0],
+                            hovertemplate="%{x}<br>TP rate: %{y:.2%}<extra></extra>",
                         )
                     )
-                apply_chart_theme(fig_tpr, height=420)
-                fig_tpr.update_layout(
-                    title=f"TP rate by distance",
-                    xaxis_title="Distance bin",
-                    yaxis_title="TP rate",
-                    yaxis_range=[0, 1],
-                    barmode="group",
-                    xaxis=_xaxis_dist_bins,
-                    hovermode="x unified",
-                )
-                fig_tpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig_tpr, width='stretch')
-            else:
-                st.info("No TP rate by distance data.")
-
-            if not df_fpr_dist.empty:
-                fig_fpr = go.Figure()
-                for i, lbl in enumerate(run_labels_list):
-                    d = df_fpr_dist[df_fpr_dist["run"] == lbl].sort_values("bin_order")
-                    fig_fpr.add_trace(
+                    fig.add_trace(
                         go.Bar(
-                            x=d["bin_label"],
-                            y=d["fpr"],
-                            name=lbl,
-                            marker_color=RUN_COLORS[i % len(RUN_COLORS)],
-                            hovertemplate=f"{lbl}<br>%{{x}}<br>FP rate: %{{y:.2%}}<extra></extra>",
+                            x=x_labels,
+                            y=df_both["fpr"],
+                            name="FP rate",
+                            marker_color=RUN_COLORS[1],
+                            hovertemplate="%{x}<br>FP rate: %{y:.2%}<extra></extra>",
                         )
                     )
-                apply_chart_theme(fig_fpr, height=420)
-                fig_fpr.update_layout(
-                    title=f"FP rate by distance",
-                    xaxis_title="Distance bin",
-                    yaxis_title="FP rate",
-                    yaxis_range=[0, 1],
-                    barmode="group",
-                    xaxis=_xaxis_dist_bins,
-                    hovermode="x unified",
-                )
-                fig_fpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
-                st.plotly_chart(fig_fpr, width='stretch')
+                    apply_chart_theme(fig, height=420)
+                    fig.update_layout(
+                        title=f"TP & FP rate by distance (within {max_eval_range} m)",
+                        xaxis_title="Distance bin",
+                        yaxis_title="Rate",
+                        yaxis_range=[0, 1],
+                        barmode="group",
+                        xaxis=dict(
+                            tickangle=-35,
+                            categoryorder="array",
+                            categoryarray=x_labels,
+                        ),
+                        hovermode="x unified",
+                    )
+                    fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig, width='stretch')
             else:
-                st.info("No FP rate by distance data.")
-
-    # Object count by same distance bins as TP/FP; same line vs bar style; aligned x-axis
-
-    try:
-        if single_mode:
-            q_oc = f"""
-            SELECT ({_DIST_BIN_CASE}) AS distance_bin, label, COUNT(*) AS n
-            FROM view_eval_flat
-            WHERE {filter_clause_base}
-            GROUP BY 1, 2
-            """
-            df_oc = con.execute(q_oc).df()
+                st.info("No distance-bin data available.")
         else:
-            dfs_oc = []
+            # Compare mode: fetch TP and FP by distance per run
+            ds_dlog("distance: compare_mode n_runs=%s", len(runs))
+            dfs_tpr = []
             for i in range(len(runs)):
-                fc_oc = build_filter_clause(filters_list[i], enable_dist_h=False)
-                q_oc_i = f"""
-                SELECT ({_DIST_BIN_CASE}) AS distance_bin, COUNT(*) AS n
-                FROM {_flat_view(i)}
-                WHERE {fc_oc}
-                GROUP BY 1
-                """
-                df_oci = con.execute(q_oc_i).df()
-                df_oci["run"] = run_labels_list[i]
-                dfs_oc.append(df_oci)
-            df_oc = pd.concat(dfs_oc, ignore_index=True)
-
-        if df_oc.empty:
-            st.info("No object count data by distance bin.")
-        else:
-            df_oc = df_oc.copy()
-            df_oc["bin_order"], df_oc["bin_label"] = zip(*df_oc["distance_bin"].map(_distance_bin_order_and_label))
-            if rate_bin_labels_order:
-                align_x = list(rate_bin_labels_order)
-            else:
-                align_x = (
-                    df_oc.drop_duplicates("distance_bin")
+                fc = build_filter_clause(filters_list[i], enable_dist_h=False)
+                q = sql_distance_bin_rates_from_eval_flat(_flat_view(i), fc, metrics="tpr")
+                ds_dlog("distance: compare run %s/%s TPR by bin query", i + 1, len(runs))
+                df_i = con.execute(q).df()
+                ds_dlog("distance: compare TPR query run %s rows=%s", i, len(df_i))
+                df_i["run"] = run_labels_list[i]
+                df_i["bin_order"], df_i["bin_label"] = zip(*df_i["distance_bin"].map(_distance_bin_order_and_label))
+                df_i = df_i.sort_values("bin_order")
+                dfs_tpr.append(df_i)
+            df_tpr_dist = pd.concat(dfs_tpr, ignore_index=True)
+            ds_dlog("distance: df_tpr_dist total_rows=%s", len(df_tpr_dist))
+    
+            dfs_fpr = []
+            for i in range(len(runs)):
+                fc = build_filter_clause(filters_list[i], enable_dist_h=False)
+                q = sql_distance_bin_rates_from_eval_flat(_flat_view(i), fc, metrics="fpr")
+                ds_dlog("distance: compare run %s/%s FPR by bin query", i + 1, len(runs))
+                df_i = con.execute(q).df()
+                ds_dlog("distance: compare FPR query run %s rows=%s", i, len(df_i))
+                df_i["run"] = run_labels_list[i]
+                df_i["bin_order"], df_i["bin_label"] = zip(*df_i["distance_bin"].map(_distance_bin_order_and_label))
+                df_i = df_i.sort_values("bin_order")
+                dfs_fpr.append(df_i)
+            df_fpr_dist = pd.concat(dfs_fpr, ignore_index=True)
+    
+            if not df_tpr_dist.empty:
+                rate_bin_labels_order = (
+                    df_tpr_dist[df_tpr_dist["run"] == run_labels_list[0]]
                     .sort_values("bin_order")["bin_label"]
                     .tolist()
                 )
-
-            xaxis_oc = dict(tickangle=-35, categoryorder="array", categoryarray=align_x)
-
-            if single_mode:
-                pivot_oc = df_oc.pivot_table(
-                    index="bin_label", columns="label", values="n", aggfunc="sum", fill_value=0
-                )
-                pivot_oc = pivot_oc.reindex(align_x, fill_value=0)
-
-                fig_oc = go.Figure()
-                if use_line_chart:
-                    for j, lab in enumerate(pivot_oc.columns):
-                        c = RUN_COLORS[j % len(RUN_COLORS)]
+            _xaxis_dist_bins = (
+                dict(tickangle=-35, categoryorder="array", categoryarray=rate_bin_labels_order)
+                if rate_bin_labels_order
+                else dict(tickangle=-35)
+            )
+    
+            if use_line_chart:
+                if not df_tpr_dist.empty:
+                    fig_tpr = go.Figure()
+                    for i, lbl in enumerate(run_labels_list):
+                        d = df_tpr_dist[df_tpr_dist["run"] == lbl].sort_values("bin_order")
+                        c = RUN_COLORS[i % len(RUN_COLORS)]
                         r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
-                        nm = str(lab)
-                        fig_oc.add_trace(
+                        fig_tpr.add_trace(
                             go.Scatter(
-                                x=align_x,
-                                y=pivot_oc[lab].values,
-                                name=nm,
+                                x=d["bin_label"],
+                                y=d["tpr"],
+                                name=lbl,
                                 mode="lines",
                                 line=dict(color=c, width=2.2, shape="spline"),
                                 fill="tozeroy",
-                                fillcolor=f"rgba({r},{g},{b},0.12)",
-                                hovertemplate=f"{nm}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                fillcolor=f"rgba({r},{g},{b},0.15)",
+                                hovertemplate=f"{lbl}<br>%{{x}}<br>TP rate: %{{y:.2%}}<extra></extra>",
                             )
                         )
+                    apply_chart_theme(fig_tpr, height=420)
+                    fig_tpr.update_layout(
+                        title=f"TP rate by distance",
+                        xaxis_title="Distance bin",
+                        yaxis_title="TP rate",
+                        yaxis_range=[0, 1],
+                        xaxis=_xaxis_dist_bins,
+                        hovermode="x unified",
+                    )
+                    fig_tpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig_tpr, width='stretch')
                 else:
-                    for j, lab in enumerate(pivot_oc.columns):
-                        c = RUN_COLORS[j % len(RUN_COLORS)]
-                        nm = str(lab)
-                        fig_oc.add_trace(
-                            go.Bar(
-                                x=align_x,
-                                y=pivot_oc[lab].values,
-                                name=nm,
-                                marker_color=c,
-                                hovertemplate=f"{nm}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
-                            )
-                        )
-                apply_chart_theme(fig_oc, height=420)
-                fig_oc.update_layout(
-                    title=f"Object count by distance bin (within {max_eval_range} m)",
-                    xaxis_title="Distance bin",
-                    yaxis_title="Count",
-                    xaxis=xaxis_oc,
-                    hovermode="x unified",
-                    **({"barmode": "group"} if not use_line_chart else {}),
-                )
-                st.plotly_chart(fig_oc, width='stretch')
-            else:
-                pivot_oc = df_oc.pivot_table(
-                    index="bin_label", columns="run", values="n", aggfunc="sum", fill_value=0
-                )
-                pivot_oc = pivot_oc.reindex(align_x, fill_value=0)
-                run_cols = [r for r in run_labels_list if r in pivot_oc.columns]
-
-                fig_oc = go.Figure()
-                if use_line_chart:
-                    for j, rl in enumerate(run_cols):
-                        c = RUN_COLORS[j % len(RUN_COLORS)]
+                    st.info("No TP rate by distance data.")
+    
+                if not df_fpr_dist.empty:
+                    fig_fpr = go.Figure()
+                    for i, lbl in enumerate(run_labels_list):
+                        d = df_fpr_dist[df_fpr_dist["run"] == lbl].sort_values("bin_order")
+                        c = RUN_COLORS[i % len(RUN_COLORS)]
                         r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
-                        fig_oc.add_trace(
+                        fig_fpr.add_trace(
                             go.Scatter(
-                                x=align_x,
-                                y=pivot_oc[rl].values,
-                                name=str(rl),
+                                x=d["bin_label"],
+                                y=d["fpr"],
+                                name=lbl,
                                 mode="lines",
                                 line=dict(color=c, width=2.2, shape="spline"),
                                 fill="tozeroy",
                                 fillcolor=f"rgba({r},{g},{b},0.15)",
-                                hovertemplate=f"{rl}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                hovertemplate=f"{lbl}<br>%{{x}}<br>FP rate: %{{y:.2%}}<extra></extra>",
                             )
                         )
+                    apply_chart_theme(fig_fpr, height=420)
+                    fig_fpr.update_layout(
+                        title=f"FP rate by distance",
+                        xaxis_title="Distance bin",
+                        yaxis_title="FP rate",
+                        yaxis_range=[0, 1],
+                        xaxis=_xaxis_dist_bins,
+                        hovermode="x unified",
+                    )
+                    fig_fpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig_fpr, width='stretch')
                 else:
-                    for j, rl in enumerate(run_cols):
-                        c = RUN_COLORS[j % len(RUN_COLORS)]
-                        fig_oc.add_trace(
+                    st.info("No FP rate by distance data.")
+            else:
+                # Bar chart (histogram) for compare: TP then FP, grouped by run
+                if not df_tpr_dist.empty:
+                    fig_tpr = go.Figure()
+                    for i, lbl in enumerate(run_labels_list):
+                        d = df_tpr_dist[df_tpr_dist["run"] == lbl].sort_values("bin_order")
+                        fig_tpr.add_trace(
                             go.Bar(
-                                x=align_x,
-                                y=pivot_oc[rl].values,
-                                name=str(rl),
-                                marker_color=c,
-                                hovertemplate=f"{rl}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                x=d["bin_label"],
+                                y=d["tpr"],
+                                name=lbl,
+                                marker_color=RUN_COLORS[i % len(RUN_COLORS)],
+                                hovertemplate=f"{lbl}<br>%{{x}}<br>TP rate: %{{y:.2%}}<extra></extra>",
                             )
                         )
-                apply_chart_theme(fig_oc, height=420)
-                fig_oc.update_layout(
-                    title=f"Object count by distance bin",
-                    xaxis_title="Distance bin",
-                    yaxis_title="Count",
-                    xaxis=xaxis_oc,
-                    hovermode="x unified",
-                    **({"barmode": "group"} if not use_line_chart else {}),
-                )
-                st.plotly_chart(fig_oc, width='stretch')
-    except Exception as e_oc:
-        st.error(f"Error (object count by distance bin): {e_oc}")
-
-except Exception as e:
-    st.error(f"Error: {e}")
-finally:
-    _dist_slot.empty()
-# =============================
-# Panel 2: TP Rate (single) / TP Rate Comparison (compare)
-# =============================
-st.markdown(
-    section_header_html(
-        "TP Rate" + (" Comparison" if not single_mode else ""),
-        "TP rate per object class (GT TP / (TP+FN)). Pick a chart style below.",
-    ),
-    unsafe_allow_html=True,
-)
-
-_tpr_query = """
-SELECT
-    label,
-    CASE
-        WHEN COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) > 0
-        THEN CAST(COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS DOUBLE)
-             / COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN'))
-        ELSE 0
-    END AS tpr
-FROM {view}
-WHERE {filter_clause}
-GROUP BY label
-ORDER BY label
-"""
-
-# Compare-mode TP rate spider charts: several distance caps + no cap (sidebar range not used for this view)
-TPR_COMPARE_SPIDER_RANGES: List[Tuple[Optional[int], str]] = [
-    (50, "≤50 m"),
-    (80, "≤80 m"),
-    (100, "≤100 m"),
-    (120, "≤120 m"),
-    (150, "≤150 m"),
-    (None, "All distances"),
-]
-
-if single_mode:
-    tpr_viz = st.radio(
-        "TP rate chart style",
-        options=["Bar chart", "Lollipop (ranked)"],
-        index=0,
-        horizontal=True,
-        key="tpr_viz_single",
-    )
-    try:
-        with ds_spot_loading("TP rate"):
-            filter_clause = build_filter_clause(filters_base)
-            query = _tpr_query.format(view="view_eval_flat", filter_clause=filter_clause)
-            df_tpr_base = con.execute(query).df()
-        if not df_tpr_base.empty:
-            title = f"Total TP rate within {max_eval_range} [m]"
-            if tpr_viz == "Bar chart":
-                fig = px.bar(
-                    df_tpr_base,
-                    x="label",
-                    y="tpr",
-                    title=title,
-                    labels={"tpr": "TP Rate", "label": "Label"},
-                )
-                apply_chart_theme(fig)
-                fig.update_layout(yaxis_range=[0, 1.2])
-                fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
-                st.plotly_chart(fig, width='stretch')
+                    apply_chart_theme(fig_tpr, height=420)
+                    fig_tpr.update_layout(
+                        title=f"TP rate by distance",
+                        xaxis_title="Distance bin",
+                        yaxis_title="TP rate",
+                        yaxis_range=[0, 1],
+                        barmode="group",
+                        xaxis=_xaxis_dist_bins,
+                        hovermode="x unified",
+                    )
+                    fig_tpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig_tpr, width='stretch')
+                else:
+                    st.info("No TP rate by distance data.")
+    
+                if not df_fpr_dist.empty:
+                    fig_fpr = go.Figure()
+                    for i, lbl in enumerate(run_labels_list):
+                        d = df_fpr_dist[df_fpr_dist["run"] == lbl].sort_values("bin_order")
+                        fig_fpr.add_trace(
+                            go.Bar(
+                                x=d["bin_label"],
+                                y=d["fpr"],
+                                name=lbl,
+                                marker_color=RUN_COLORS[i % len(RUN_COLORS)],
+                                hovertemplate=f"{lbl}<br>%{{x}}<br>FP rate: %{{y:.2%}}<extra></extra>",
+                            )
+                        )
+                    apply_chart_theme(fig_fpr, height=420)
+                    fig_fpr.update_layout(
+                        title=f"FP rate by distance",
+                        xaxis_title="Distance bin",
+                        yaxis_title="FP rate",
+                        yaxis_range=[0, 1],
+                        barmode="group",
+                        xaxis=_xaxis_dist_bins,
+                        hovermode="x unified",
+                    )
+                    fig_fpr.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.25)")
+                    st.plotly_chart(fig_fpr, width='stretch')
+                else:
+                    st.info("No FP rate by distance data.")
+    
+        # Object count by same distance bins as TP/FP; same line vs bar style; aligned x-axis
+    
+        try:
+            if single_mode:
+                q_oc = f"""
+                SELECT ({_DIST_BIN_CASE}) AS distance_bin, label, COUNT(*) AS n
+                FROM view_eval_flat
+                WHERE {filter_clause_base}
+                GROUP BY 1, 2
+                """
+                df_oc = con.execute(q_oc).df()
             else:
-                fig = _tpr_lollipop_single(df_tpr_base, title)
-                st.plotly_chart(fig, width='stretch')
-        else:
-            st.info("No data available")
-    except Exception as e:
-        st.error(f"Error: {e}")
-else:
-    tpr_opts = ["Spider chart", "Grouped bar", "Heatmap (label × run)", "Line profile"]
-    tpr_viz = st.radio(
-        "TP rate chart style",
-        options=tpr_opts,
-        index=0,
-        horizontal=True,
-        key="tpr_viz_compare",
-    )
-    try:
-        with ds_spot_loading("TP rate"):
-            dfs_tpr = []
-            for i in range(len(runs)):
-                fc = build_filter_clause(filters_list[i])
-                q = _tpr_query.format(view=_flat_view(i), filter_clause=fc)
-                df_i = con.execute(q).df()
-                df_i["run"] = run_labels_list[i]
-                dfs_tpr.append(df_i)
-            df_tpr_all = pd.concat(dfs_tpr, ignore_index=True)
-        if tpr_viz == "Spider chart":
-            st.caption(
-                "Six spider charts use **fixed distance cutoffs** (50–150 m) plus **all distances**. "
-                "Topic / label / suite / visibility filters still apply. "
-                "Other chart types and the rest of the page use the sidebar **Max Evaluation Range**."
-            )
-            fb_all = {**filters_base, "max_eval_range": None}
-            label_union: set = set()
-            for i in range(len(runs)):
-                fc_a = build_filter_clause(fb_all)
-                q_a = _tpr_query.format(view=_flat_view(i), filter_clause=fc_a)
-                dfa = con.execute(q_a).df()
-                label_union |= set(dfa["label"].astype(str))
-            cats = sorted(label_union)
-            if not cats:
-                st.info("No TP rate data for any distance range with current filters.")
+                dfs_oc = []
+                for i in range(len(runs)):
+                    fc_oc = build_filter_clause(filters_list[i], enable_dist_h=False)
+                    q_oc_i = f"""
+                    SELECT ({_DIST_BIN_CASE}) AS distance_bin, COUNT(*) AS n
+                    FROM {_flat_view(i)}
+                    WHERE {fc_oc}
+                    GROUP BY 1
+                    """
+                    df_oci = con.execute(q_oc_i).df()
+                    df_oci["run"] = run_labels_list[i]
+                    dfs_oc.append(df_oci)
+                df_oc = pd.concat(dfs_oc, ignore_index=True)
+    
+            if df_oc.empty:
+                st.info("No object count data by distance bin.")
             else:
-                if len(cats) > 16:
-                    st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
-                for row_start in range(0, len(TPR_COMPARE_SPIDER_RANGES), 3):
-                    row_ranges = TPR_COMPARE_SPIDER_RANGES[row_start : row_start + 3]
-                    cols = st.columns(len(row_ranges))
-                    for col, (max_r, cap_lbl) in zip(cols, row_ranges):
-                        fb = {**filters_base, "max_eval_range": max_r}
-                        dfs_slice = []
-                        for i in range(len(runs)):
-                            fc = build_filter_clause(fb)
-                            q = _tpr_query.format(view=_flat_view(i), filter_clause=fc)
-                            dfi = con.execute(q).df()
-                            dfi["run"] = run_labels_list[i]
-                            dfs_slice.append(dfi)
-                        df_slice = pd.concat(dfs_slice, ignore_index=True)
-                        with col:
-                            if df_slice.empty:
-                                st.info(f"No data ({cap_lbl}).")
-                            else:
-                                fig = _tpr_spider_compare(
-                                    df_slice,
-                                    cats,
-                                    f"TP rate ({cap_lbl})",
-                                    run_labels_list,
-                                    height=360,
+                df_oc = df_oc.copy()
+                df_oc["bin_order"], df_oc["bin_label"] = zip(*df_oc["distance_bin"].map(_distance_bin_order_and_label))
+                if rate_bin_labels_order:
+                    align_x = list(rate_bin_labels_order)
+                else:
+                    align_x = (
+                        df_oc.drop_duplicates("distance_bin")
+                        .sort_values("bin_order")["bin_label"]
+                        .tolist()
+                    )
+    
+                xaxis_oc = dict(tickangle=-35, categoryorder="array", categoryarray=align_x)
+    
+                if single_mode:
+                    pivot_oc = df_oc.pivot_table(
+                        index="bin_label", columns="label", values="n", aggfunc="sum", fill_value=0
+                    )
+                    pivot_oc = pivot_oc.reindex(align_x, fill_value=0)
+    
+                    fig_oc = go.Figure()
+                    if use_line_chart:
+                        for j, lab in enumerate(pivot_oc.columns):
+                            c = RUN_COLORS[j % len(RUN_COLORS)]
+                            r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
+                            nm = str(lab)
+                            fig_oc.add_trace(
+                                go.Scatter(
+                                    x=align_x,
+                                    y=pivot_oc[lab].values,
+                                    name=nm,
+                                    mode="lines",
+                                    line=dict(color=c, width=2.2, shape="spline"),
+                                    fill="tozeroy",
+                                    fillcolor=f"rgba({r},{g},{b},0.12)",
+                                    hovertemplate=f"{nm}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
                                 )
-                                st.plotly_chart(fig, width='stretch')
-        elif not df_tpr_all.empty:
-            title = f"Total TP rate within {max_eval_range} [m] by run"
-            if tpr_viz == "Grouped bar":
-                fig = px.bar(
-                    df_tpr_all,
-                    x="label",
-                    y="tpr",
-                    color="run",
-                    barmode="group",
-                    title=title,
-                    labels={"tpr": "TP Rate", "label": "Label", "run": "Run"},
-                    color_discrete_sequence=RUN_COLORS,
-                )
-                apply_chart_theme(fig)
-                fig.update_layout(yaxis_range=[0, 1.2])
-                fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
-                st.plotly_chart(fig, width='stretch')
-            elif tpr_viz == "Heatmap (label × run)":
-                pivot = df_tpr_all.pivot_table(index="label", columns="run", values="tpr", aggfunc="first")
-                cols_present = [c for c in run_labels_list if c in pivot.columns]
-                if cols_present:
-                    pivot = pivot[cols_present]
-                fig = px.imshow(
-                    pivot,
-                    labels=dict(x="Run", y="Label", color="TP rate"),
-                    title=title,
-                    color_continuous_scale="RdYlGn",
-                    zmin=0,
-                    zmax=1,
-                    aspect="auto",
-                )
-                apply_chart_theme(fig, height=max(360, 32 + 22 * len(pivot.index)))
-                fig.update_layout(xaxis_side="top")
-                st.plotly_chart(fig, width='stretch')
-            elif tpr_viz == "Line profile":
-                fig = px.line(
-                    df_tpr_all,
-                    x="label",
-                    y="tpr",
-                    color="run",
-                    markers=True,
-                    title=title,
-                    labels={"tpr": "TP Rate", "label": "Label", "run": "Run"},
-                    color_discrete_sequence=RUN_COLORS,
-                )
-                fig.update_traces(line=dict(width=2.5), marker=dict(size=8))
-                apply_chart_theme(fig, height=400)
-                fig.update_layout(yaxis_range=[0, 1.15], xaxis_tickangle=-35, hovermode="x unified")
-                fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
-                st.plotly_chart(fig, width='stretch')
-        else:
-            st.info("No data available")
+                            )
+                    else:
+                        for j, lab in enumerate(pivot_oc.columns):
+                            c = RUN_COLORS[j % len(RUN_COLORS)]
+                            nm = str(lab)
+                            fig_oc.add_trace(
+                                go.Bar(
+                                    x=align_x,
+                                    y=pivot_oc[lab].values,
+                                    name=nm,
+                                    marker_color=c,
+                                    hovertemplate=f"{nm}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                )
+                            )
+                    apply_chart_theme(fig_oc, height=420)
+                    fig_oc.update_layout(
+                        title=f"Object count by distance bin (within {max_eval_range} m)",
+                        xaxis_title="Distance bin",
+                        yaxis_title="Count",
+                        xaxis=xaxis_oc,
+                        hovermode="x unified",
+                        **({"barmode": "group"} if not use_line_chart else {}),
+                    )
+                    st.plotly_chart(fig_oc, width='stretch')
+                else:
+                    pivot_oc = df_oc.pivot_table(
+                        index="bin_label", columns="run", values="n", aggfunc="sum", fill_value=0
+                    )
+                    pivot_oc = pivot_oc.reindex(align_x, fill_value=0)
+                    run_cols = [r for r in run_labels_list if r in pivot_oc.columns]
+    
+                    fig_oc = go.Figure()
+                    if use_line_chart:
+                        for j, rl in enumerate(run_cols):
+                            c = RUN_COLORS[j % len(RUN_COLORS)]
+                            r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
+                            fig_oc.add_trace(
+                                go.Scatter(
+                                    x=align_x,
+                                    y=pivot_oc[rl].values,
+                                    name=str(rl),
+                                    mode="lines",
+                                    line=dict(color=c, width=2.2, shape="spline"),
+                                    fill="tozeroy",
+                                    fillcolor=f"rgba({r},{g},{b},0.15)",
+                                    hovertemplate=f"{rl}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                )
+                            )
+                    else:
+                        for j, rl in enumerate(run_cols):
+                            c = RUN_COLORS[j % len(RUN_COLORS)]
+                            fig_oc.add_trace(
+                                go.Bar(
+                                    x=align_x,
+                                    y=pivot_oc[rl].values,
+                                    name=str(rl),
+                                    marker_color=c,
+                                    hovertemplate=f"{rl}<br>%{{x}}<br>Count: %{{y:.0f}}<extra></extra>",
+                                )
+                            )
+                    apply_chart_theme(fig_oc, height=420)
+                    fig_oc.update_layout(
+                        title=f"Object count by distance bin",
+                        xaxis_title="Distance bin",
+                        yaxis_title="Count",
+                        xaxis=xaxis_oc,
+                        hovermode="x unified",
+                        **({"barmode": "group"} if not use_line_chart else {}),
+                    )
+                    st.plotly_chart(fig_oc, width='stretch')
+        except Exception as e_oc:
+            st.error(f"Error (object count by distance bin): {e_oc}")
+    
     except Exception as e:
         st.error(f"Error: {e}")
-# =============================
-# Panel 5: Perception diff vs baseline A (compare mode only)
-# =============================
-def _baobab_hierarchy_from_objects(
-    df_obj: pd.DataFrame,
-    change_type: str,
-    root_label: str,
-    max_scenarios: int,
-    max_frames: int,
-) -> pd.DataFrame:
-    """
-    Build a leaf table for Plotly sunburst/treemap: root → scenario → frame → label.
-    Caps scenarios and frames per scenario; merges the rest into Other buckets.
-    """
-    if df_obj.empty or "change_type" not in df_obj.columns:
-        return pd.DataFrame()
-    sub = df_obj[df_obj["change_type"] == change_type].copy()
-    if sub.empty:
-        return pd.DataFrame()
-    sub["scenario_name"] = sub["scenario_name"].fillna("").astype(str).replace("", "(no scenario)")
-    sub["label"] = sub["label"].fillna("").astype(str).replace("", "(no label)")
-    sub["frame_key"] = (
-        sub["t4dataset_id"].astype(str) + "|f" + sub["frame_index"].astype(str)
-    )
-    leaf = (
-        sub.groupby(["scenario_name", "frame_key", "label"], dropna=False)
-        .size()
-        .reset_index(name="n")
-    )
-    if leaf.empty:
-        return pd.DataFrame()
-    ms = max(int(max_scenarios), 1)
-    mf = max(int(max_frames), 1)
-    scen_tot = leaf.groupby("scenario_name")["n"].sum().sort_values(ascending=False)
-    top_scen = set(scen_tot.head(ms).index)
-    leaf["scen_g"] = np.where(
-        leaf["scenario_name"].isin(top_scen),
-        leaf["scenario_name"],
-        "Other scenarios",
-    )
-    parts = []
-    for _, g in leaf.groupby("scen_g"):
-        fr_tot = g.groupby("frame_key")["n"].sum().sort_values(ascending=False)
-        top_fr = set(fr_tot.head(mf).index)
-        g2 = g.copy()
-        g2["fr_g"] = np.where(g2["frame_key"].isin(top_fr), g2["frame_key"], "Other frames")
-        agg = g2.groupby(["scen_g", "fr_g", "label"], as_index=False)["n"].sum()
-        parts.append(agg)
-    out = pd.concat(parts, ignore_index=True)
-    out["root"] = root_label
-
-    def _frame_ring_label(fr_g: str, scen_g: str) -> str:
-        if fr_g == "Other frames" or str(fr_g) == "Other frames":
-            return "Other frames"
-        sfg = str(fr_g)
-        if "|f" not in sfg:
-            return sfg
-        fid = sfg.split("|f", 1)[-1]
-        if scen_g == "Other scenarios":
-            t4 = sfg.split("|f", 1)[0]
-            t4s = t4 if len(t4) <= 14 else ("…" + t4[-12:])
-            return f"{t4s}|f{fid}"
-        return f"f{fid}"
-
-    out["fr_display"] = out.apply(
-        lambda r: _frame_ring_label(r["fr_g"], r["scen_g"]), axis=1
-    )
-    return out
-
-
-def _comparison_lens_treemap_df(
-    names: pd.Series,
-    improved: pd.Series,
-    degraded: pd.Series,
-    root_title: str,
-) -> pd.DataFrame:
-    """Rows for px.treemap path root → Improved|Degraded → item (area = n)."""
-    rows = []
-    for i in range(len(names)):
-        nm = str(names.iloc[i]).strip() or "—"
-        if len(nm) > 72:
-            nm = nm[:69] + "…"
-        ip = float(improved.iloc[i]) if pd.notna(improved.iloc[i]) else 0.0
-        dg = float(degraded.iloc[i]) if pd.notna(degraded.iloc[i]) else 0.0
-        if ip > 0:
-            rows.append(
-                {"root": root_title, "side": "Improved", "item": nm, "n": ip}
-            )
-        if dg > 0:
-            rows.append(
-                {"root": root_title, "side": "Degraded", "item": nm, "n": dg}
-            )
-    return pd.DataFrame(rows)
-
-
-def _plot_comparison_lens_treemap(
-    tdf: pd.DataFrame,
-    st_key: str,
-    title: str,
-) -> None:
-    if tdf is None or tdf.empty:
-        st.caption("_No data for this view._")
-        return
-    fig = px.treemap(
-        tdf,
-        path=["root", "side", "item"],
-        values="n",
-        color="side",
-        color_discrete_map={"Improved": IMPROVED_COLOR, "Degraded": DEGRADED_COLOR},
-    )
-    fig.update_traces(
-        textfont_size=12,
-        textinfo="label+value+percent parent",
-        hovertemplate=(
-            "<b>%{label}</b><br>"
-            "GT objects: %{value:.0f}<br>"
-            "% of parent: %{percentParent}<extra></extra>"
-        ),
-        marker_line_width=1.5,
-        marker_line_color="rgba(255,255,255,0.45)",
-        root_color="rgba(240,240,245,0.95)",
-    )
-    _title_layout = {**PLOTLY_LAYOUT_THEME["title"], "text": title}
-    apply_chart_theme(
-        fig,
-        height=430,
-        margin=dict(t=20, l=2, r=2, b=2),
-        paper_bgcolor="rgba(0,0,0,0)",
-        title=_title_layout,
-    )
-    st.plotly_chart(fig, width='stretch', key=st_key)
-
-
-if not single_mode:
-    st.divider()
+    finally:
+        _dist_slot.empty()
+    ds_dlog("section: Panel3_5_Distance_end")
+    # =============================
+    # Panel 2: TP Rate (single) / TP Rate Comparison (compare)
+    # =============================
+    ds_dlog("section: Panel2_TP_Rate_start")
     st.markdown(
         section_header_html(
-            "Perception diff (vs baseline A)",
-            "Per-GT-object comparison vs baseline A: degraded = was TP on A and FN on candidate; improved = was FN on A and TP on candidate. Hotspots prioritize regressions.",
+            "TP Rate" + (" Comparison" if not single_mode else ""),
+            "TP rate per object class (GT TP / (TP+FN)). Pick a chart style below.",
         ),
         unsafe_allow_html=True,
     )
-    for idx in range(1, len(runs)):
-        lbl = run_labels_list[idx]
-        _pd_slot = st.empty()
-        _pd_slot.markdown(ds_spot_loading_markup(f"Perception diff · run {lbl}"), unsafe_allow_html=True)
+    
+    _tpr_query = """
+    SELECT
+        label,
+        CASE
+            WHEN COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN')) > 0
+            THEN CAST(COUNT(*) FILTER (WHERE source='GT' AND status='TP') AS DOUBLE)
+                 / COUNT(*) FILTER (WHERE source='GT' AND status IN ('TP','FN'))
+            ELSE 0
+        END AS tpr
+    FROM {view}
+    WHERE {filter_clause}
+    GROUP BY label
+    ORDER BY label
+    """
+    
+    # Compare-mode TP rate spider charts: several distance caps + no cap (sidebar range not used for this view)
+    TPR_COMPARE_SPIDER_RANGES: List[Tuple[Optional[int], str]] = [
+        (50, "≤50 m"),
+        (80, "≤80 m"),
+        (100, "≤100 m"),
+        (120, "≤120 m"),
+        (150, "≤150 m"),
+        (None, "All distances"),
+    ]
+    
+    if single_mode:
+        tpr_viz = st.radio(
+            "TP rate chart style",
+            options=["Bar chart", "Lollipop (ranked)"],
+            index=0,
+            horizontal=True,
+            key="tpr_viz_single",
+        )
         try:
-            filter_clause_comp_p5 = build_filter_clause(filters_list[idx], enable_dist_h=False)
-            comp_flat = _flat_view(idx)
-            query = f"""
-            WITH base_gt AS (
-                SELECT
-                    t4dataset_id,
-                    frame_index,
-                    uuid AS gt_uuid,
-                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
-                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
-                FROM view_eval_flat
-                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                    AND {filter_clause_base}
-                GROUP BY 1,2,3
+            with ds_spot_loading("TP rate"):
+                filter_clause = build_filter_clause(filters_base)
+                query = _tpr_query.format(view="view_eval_flat", filter_clause=filter_clause)
+                df_tpr_base = con.execute(query).df()
+            if not df_tpr_base.empty:
+                title = f"Total TP rate within {max_eval_range} [m]"
+                if tpr_viz == "Bar chart":
+                    fig = px.bar(
+                        df_tpr_base,
+                        x="label",
+                        y="tpr",
+                        title=title,
+                        labels={"tpr": "TP Rate", "label": "Label"},
+                    )
+                    apply_chart_theme(fig)
+                    fig.update_layout(yaxis_range=[0, 1.2])
+                    fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
+                    st.plotly_chart(fig, width='stretch')
+                else:
+                    fig = _tpr_lollipop_single(df_tpr_base, title)
+                    st.plotly_chart(fig, width='stretch')
+            else:
+                st.info("No data available")
+        except Exception as e:
+            st.error(f"Error: {e}")
+    else:
+        tpr_opts = ["Spider chart", "Grouped bar", "Heatmap (label × run)", "Line profile"]
+        tpr_viz = st.radio(
+            "TP rate chart style",
+            options=tpr_opts,
+            index=0,
+            horizontal=True,
+            key="tpr_viz_compare",
+        )
+        try:
+            with ds_spot_loading("TP rate"):
+                dfs_tpr = []
+                for i in range(len(runs)):
+                    fc = build_filter_clause(filters_list[i])
+                    q = _tpr_query.format(view=_flat_view(i), filter_clause=fc)
+                    df_i = con.execute(q).df()
+                    df_i["run"] = run_labels_list[i]
+                    dfs_tpr.append(df_i)
+                df_tpr_all = pd.concat(dfs_tpr, ignore_index=True)
+            if tpr_viz == "Spider chart":
+                st.caption(
+                    "Six spider charts use **fixed distance cutoffs** (50–150 m) plus **all distances**. "
+                    "Topic / label / suite / visibility filters still apply. "
+                    "Other chart types and the rest of the page use the sidebar **Max Evaluation Range**."
+                )
+                fb_all = {**filters_base, "max_eval_range": None}
+                label_union: set = set()
+                for i in range(len(runs)):
+                    fc_a = build_filter_clause(fb_all)
+                    q_a = _tpr_query.format(view=_flat_view(i), filter_clause=fc_a)
+                    dfa = con.execute(q_a).df()
+                    label_union |= set(dfa["label"].astype(str))
+                cats = sorted(label_union)
+                if not cats:
+                    st.info("No TP rate data for any distance range with current filters.")
+                else:
+                    if len(cats) > 16:
+                        st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
+                    for row_start in range(0, len(TPR_COMPARE_SPIDER_RANGES), 3):
+                        row_ranges = TPR_COMPARE_SPIDER_RANGES[row_start : row_start + 3]
+                        cols = st.columns(len(row_ranges))
+                        for col, (max_r, cap_lbl) in zip(cols, row_ranges):
+                            fb = {**filters_base, "max_eval_range": max_r}
+                            dfs_slice = []
+                            for i in range(len(runs)):
+                                fc = build_filter_clause(fb)
+                                q = _tpr_query.format(view=_flat_view(i), filter_clause=fc)
+                                dfi = con.execute(q).df()
+                                dfi["run"] = run_labels_list[i]
+                                dfs_slice.append(dfi)
+                            df_slice = pd.concat(dfs_slice, ignore_index=True)
+                            with col:
+                                if df_slice.empty:
+                                    st.info(f"No data ({cap_lbl}).")
+                                else:
+                                    fig = _tpr_spider_compare(
+                                        df_slice,
+                                        cats,
+                                        f"TP rate ({cap_lbl})",
+                                        run_labels_list,
+                                        height=360,
+                                    )
+                                    st.plotly_chart(fig, width='stretch')
+            elif not df_tpr_all.empty:
+                title = f"Total TP rate within {max_eval_range} [m] by run"
+                if tpr_viz == "Grouped bar":
+                    fig = px.bar(
+                        df_tpr_all,
+                        x="label",
+                        y="tpr",
+                        color="run",
+                        barmode="group",
+                        title=title,
+                        labels={"tpr": "TP Rate", "label": "Label", "run": "Run"},
+                        color_discrete_sequence=RUN_COLORS,
+                    )
+                    apply_chart_theme(fig)
+                    fig.update_layout(yaxis_range=[0, 1.2])
+                    fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
+                    st.plotly_chart(fig, width='stretch')
+                elif tpr_viz == "Heatmap (label × run)":
+                    pivot = df_tpr_all.pivot_table(index="label", columns="run", values="tpr", aggfunc="first")
+                    cols_present = [c for c in run_labels_list if c in pivot.columns]
+                    if cols_present:
+                        pivot = pivot[cols_present]
+                    fig = px.imshow(
+                        pivot,
+                        labels=dict(x="Run", y="Label", color="TP rate"),
+                        title=title,
+                        color_continuous_scale="RdYlGn",
+                        zmin=0,
+                        zmax=1,
+                        aspect="auto",
+                    )
+                    apply_chart_theme(fig, height=max(360, 32 + 22 * len(pivot.index)))
+                    fig.update_layout(xaxis_side="top")
+                    st.plotly_chart(fig, width='stretch')
+                elif tpr_viz == "Line profile":
+                    fig = px.line(
+                        df_tpr_all,
+                        x="label",
+                        y="tpr",
+                        color="run",
+                        markers=True,
+                        title=title,
+                        labels={"tpr": "TP Rate", "label": "Label", "run": "Run"},
+                        color_discrete_sequence=RUN_COLORS,
+                    )
+                    fig.update_traces(line=dict(width=2.5), marker=dict(size=8))
+                    apply_chart_theme(fig, height=400)
+                    fig.update_layout(yaxis_range=[0, 1.15], xaxis_tickangle=-35, hovermode="x unified")
+                    fig.add_hline(y=0.5, line_dash="dash", line_color="rgba(0,0,0,0.2)")
+                    st.plotly_chart(fig, width='stretch')
+            else:
+                st.info("No data available")
+        except Exception as e:
+            st.error(f"Error: {e}")
+    # =============================
+    # Panel 5: Perception diff vs baseline A (compare mode only)
+    # =============================
+    def _baobab_hierarchy_from_objects(
+        df_obj: pd.DataFrame,
+        change_type: str,
+        root_label: str,
+        max_scenarios: int,
+        max_frames: int,
+    ) -> pd.DataFrame:
+        """
+        Build a leaf table for Plotly sunburst/treemap: root → scenario → frame → label.
+        Caps scenarios and frames per scenario; merges the rest into Other buckets.
+        """
+        if df_obj.empty or "change_type" not in df_obj.columns:
+            return pd.DataFrame()
+        sub = df_obj[df_obj["change_type"] == change_type].copy()
+        if sub.empty:
+            return pd.DataFrame()
+        sub["scenario_name"] = sub["scenario_name"].fillna("").astype(str).replace("", "(no scenario)")
+        sub["label"] = sub["label"].fillna("").astype(str).replace("", "(no label)")
+        sub["frame_key"] = (
+            sub["t4dataset_id"].astype(str) + "|f" + sub["frame_index"].astype(str)
+        )
+        leaf = (
+            sub.groupby(["scenario_name", "frame_key", "label"], dropna=False)
+            .size()
+            .reset_index(name="n")
+        )
+        if leaf.empty:
+            return pd.DataFrame()
+        ms = max(int(max_scenarios), 1)
+        mf = max(int(max_frames), 1)
+        scen_tot = leaf.groupby("scenario_name")["n"].sum().sort_values(ascending=False)
+        top_scen = set(scen_tot.head(ms).index)
+        leaf["scen_g"] = np.where(
+            leaf["scenario_name"].isin(top_scen),
+            leaf["scenario_name"],
+            "Other scenarios",
+        )
+        parts = []
+        for _, g in leaf.groupby("scen_g"):
+            fr_tot = g.groupby("frame_key")["n"].sum().sort_values(ascending=False)
+            top_fr = set(fr_tot.head(mf).index)
+            g2 = g.copy()
+            g2["fr_g"] = np.where(g2["frame_key"].isin(top_fr), g2["frame_key"], "Other frames")
+            agg = g2.groupby(["scen_g", "fr_g", "label"], as_index=False)["n"].sum()
+            parts.append(agg)
+        out = pd.concat(parts, ignore_index=True)
+        out["root"] = root_label
+    
+        def _frame_ring_label(fr_g: str, scen_g: str) -> str:
+            if fr_g == "Other frames" or str(fr_g) == "Other frames":
+                return "Other frames"
+            sfg = str(fr_g)
+            if "|f" not in sfg:
+                return sfg
+            fid = sfg.split("|f", 1)[-1]
+            if scen_g == "Other scenarios":
+                t4 = sfg.split("|f", 1)[0]
+                t4s = t4 if len(t4) <= 14 else ("…" + t4[-12:])
+                return f"{t4s}|f{fid}"
+            return f"f{fid}"
+    
+        out["fr_display"] = out.apply(
+            lambda r: _frame_ring_label(r["fr_g"], r["scen_g"]), axis=1
+        )
+        return out
+    
+    
+    def _comparison_lens_treemap_df(
+        names: pd.Series,
+        improved: pd.Series,
+        degraded: pd.Series,
+        root_title: str,
+    ) -> pd.DataFrame:
+        """Rows for px.treemap path root → Improved|Degraded → item (area = n)."""
+        rows = []
+        for i in range(len(names)):
+            nm = str(names.iloc[i]).strip() or "—"
+            if len(nm) > 72:
+                nm = nm[:69] + "…"
+            ip = float(improved.iloc[i]) if pd.notna(improved.iloc[i]) else 0.0
+            dg = float(degraded.iloc[i]) if pd.notna(degraded.iloc[i]) else 0.0
+            if ip > 0:
+                rows.append(
+                    {"root": root_title, "side": "Improved", "item": nm, "n": ip}
+                )
+            if dg > 0:
+                rows.append(
+                    {"root": root_title, "side": "Degraded", "item": nm, "n": dg}
+                )
+        return pd.DataFrame(rows)
+    
+    
+    def _plot_comparison_lens_treemap(
+        tdf: pd.DataFrame,
+        st_key: str,
+        title: str,
+    ) -> None:
+        if tdf is None or tdf.empty:
+            st.caption("_No data for this view._")
+            return
+        fig = px.treemap(
+            tdf,
+            path=["root", "side", "item"],
+            values="n",
+            color="side",
+            color_discrete_map={"Improved": IMPROVED_COLOR, "Degraded": DEGRADED_COLOR},
+        )
+        fig.update_traces(
+            textfont_size=12,
+            textinfo="label+value+percent parent",
+            hovertemplate=(
+                "<b>%{label}</b><br>"
+                "GT objects: %{value:.0f}<br>"
+                "% of parent: %{percentParent}<extra></extra>"
             ),
-            comp_gt AS (
-                SELECT
-                    t4dataset_id,
-                    frame_index,
-                    uuid AS gt_uuid,
-                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
-                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
-                FROM {comp_flat}
-                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                    AND {filter_clause_comp_p5}
-                GROUP BY 1,2,3
+            marker_line_width=1.5,
+            marker_line_color="rgba(255,255,255,0.45)",
+            root_color="rgba(240,240,245,0.95)",
+        )
+        _title_layout = {**PLOTLY_LAYOUT_THEME["title"], "text": title}
+        apply_chart_theme(
+            fig,
+            height=430,
+            margin=dict(t=20, l=2, r=2, b=2),
+            paper_bgcolor="rgba(0,0,0,0)",
+            title=_title_layout,
+        )
+        st.plotly_chart(fig, width='stretch', key=st_key)
+    
+    
+    if not single_mode:
+        ds_dlog("section: Perception_diff_start")
+        st.divider()
+        st.markdown(
+            section_header_html(
+                "Perception diff (vs baseline A)",
+                "Per-GT-object comparison vs baseline A: degraded = was TP on A and FN on candidate; improved = was FN on A and TP on candidate. Hotspots prioritize regressions.",
             ),
-            joined AS (
+            unsafe_allow_html=True,
+        )
+        for idx in range(1, len(runs)):
+            lbl = run_labels_list[idx]
+            _pd_slot = st.empty()
+            _pd_slot.markdown(ds_spot_loading_markup(f"Perception diff · run {lbl}"), unsafe_allow_html=True)
+            try:
+                filter_clause_comp_p5 = build_filter_clause(filters_list[idx], enable_dist_h=False)
+                comp_flat = _flat_view(idx)
+                query = f"""
+                WITH base_gt AS (
+                    SELECT
+                        t4dataset_id,
+                        frame_index,
+                        uuid AS gt_uuid,
+                        COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+                        COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                        COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                        COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                    FROM view_eval_flat
+                    WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                        AND {filter_clause_base}
+                    GROUP BY 1,2,3
+                ),
+                comp_gt AS (
+                    SELECT
+                        t4dataset_id,
+                        frame_index,
+                        uuid AS gt_uuid,
+                        COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+                        COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                        COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                        COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                    FROM {comp_flat}
+                    WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                        AND {filter_clause_comp_p5}
+                    GROUP BY 1,2,3
+                ),
+                joined AS (
+                    SELECT
+                        COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
+                        COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
+                        COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
+                        COALESCE(b.tp_base, FALSE) AS tp_base,
+                        COALESCE(c.tp_comp, FALSE) AS tp_comp,
+                        COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
+                        COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
+                        COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
+                    FROM base_gt b
+                    FULL OUTER JOIN comp_gt c
+                        ON b.t4dataset_id = c.t4dataset_id
+                       AND b.frame_index = c.frame_index
+                       AND b.gt_uuid = c.gt_uuid
+                )
                 SELECT
-                    COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
-                    COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
-                    COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
-                    COALESCE(b.tp_base, FALSE) AS tp_base,
-                    COALESCE(c.tp_comp, FALSE) AS tp_comp,
-                    COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
-                    COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
-                    COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
-                FROM base_gt b
-                FULL OUTER JOIN comp_gt c
-                    ON b.t4dataset_id = c.t4dataset_id
-                   AND b.frame_index = c.frame_index
-                   AND b.gt_uuid = c.gt_uuid
-            )
-            SELECT
-                t4dataset_id,
-                CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
-                CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
-                CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
-                CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
-                CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
-                CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta,
-                suite_name,
-                scenario_name,
-                t4dataset_name
-            FROM joined
-            GROUP BY t4dataset_id, suite_name, scenario_name, t4dataset_name
-            ORDER BY net_tp_delta DESC
-            """
-            df_improved = con.execute(query).df()
-            if not df_improved.empty:
-                query_frame_p5 = f"""
-                        WITH base_gt AS (
-                            SELECT
-                                t4dataset_id,
-                                frame_index,
-                                uuid AS gt_uuid,
-                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
-                                COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                                COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                                COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
-                            FROM view_eval_flat
-                            WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                                AND {filter_clause_base}
-                            GROUP BY 1, 2, 3
-                        ),
-                        comp_gt AS (
+                    t4dataset_id,
+                    CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
+                    CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+                    CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
+                    CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
+                    CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
+                    CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta,
+                    suite_name,
+                    scenario_name,
+                    t4dataset_name
+                FROM joined
+                GROUP BY t4dataset_id, suite_name, scenario_name, t4dataset_name
+                ORDER BY net_tp_delta DESC
+                """
+                df_improved = con.execute(query).df()
+                if not df_improved.empty:
+                    query_frame_p5 = f"""
+                            WITH base_gt AS (
+                                SELECT
+                                    t4dataset_id,
+                                    frame_index,
+                                    uuid AS gt_uuid,
+                                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+                                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                FROM view_eval_flat
+                                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                                    AND {filter_clause_base}
+                                GROUP BY 1, 2, 3
+                            ),
+                            comp_gt AS (
+                                SELECT
+                                    t4dataset_id,
+                                    frame_index,
+                                    uuid AS gt_uuid,
+                                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+                                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                FROM {comp_flat}
+                                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                                    AND {filter_clause_comp_p5}
+                                GROUP BY 1, 2, 3
+                            ),
+                            joined AS (
+                                SELECT
+                                    COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
+                                    COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
+                                    COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
+                                    COALESCE(b.tp_base, FALSE) AS tp_base,
+                                    COALESCE(c.tp_comp, FALSE) AS tp_comp,
+                                    COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
+                                    COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
+                                    COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
+                                FROM base_gt b
+                                FULL OUTER JOIN comp_gt c
+                                    ON b.t4dataset_id = c.t4dataset_id
+                                   AND b.frame_index = c.frame_index
+                                   AND b.gt_uuid = c.gt_uuid
+                            )
                             SELECT
                                 t4dataset_id,
                                 frame_index,
-                                uuid AS gt_uuid,
-                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
-                                COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                                COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                                COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
-                            FROM {comp_flat}
-                            WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                                AND {filter_clause_comp_p5}
-                            GROUP BY 1, 2, 3
-                        ),
-                        joined AS (
+                                scenario_name,
+                                suite_name,
+                                t4dataset_name,
+                                CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
+                                CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+                                CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
+                                CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
+                                CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
+                                CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta
+                            FROM joined
+                            GROUP BY t4dataset_id, frame_index, suite_name, scenario_name, t4dataset_name
+                            ORDER BY net_tp_delta DESC
+                            """
+                    query_object_p5 = f"""
+                            WITH base_gt AS (
+                                SELECT
+                                    t4dataset_id,
+                                    frame_index,
+                                    uuid AS gt_uuid,
+                                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
+                                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                FROM view_eval_flat
+                                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                                    AND {filter_clause_base}
+                                GROUP BY 1, 2, 3
+                            ),
+                            comp_gt AS (
+                                SELECT
+                                    t4dataset_id,
+                                    frame_index,
+                                    uuid AS gt_uuid,
+                                    COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
+                                    COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
+                                    COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
+                                    COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                FROM {comp_flat}
+                                WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
+                                    AND {filter_clause_comp_p5}
+                                GROUP BY 1, 2, 3
+                            ),
+                            joined AS (
+                                SELECT
+                                    COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
+                                    COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
+                                    COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
+                                    COALESCE(b.tp_base, FALSE) AS tp_base,
+                                    COALESCE(c.tp_comp, FALSE) AS tp_comp,
+                                    COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
+                                    COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
+                                    COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
+                                FROM base_gt b
+                                FULL OUTER JOIN comp_gt c
+                                    ON b.t4dataset_id = c.t4dataset_id
+                                   AND b.frame_index = c.frame_index
+                                   AND b.gt_uuid = c.gt_uuid
+                            ),
+                            obj_attrs AS (
+                                SELECT
+                                    t4dataset_id,
+                                    frame_index,
+                                    uuid,
+                                    MAX(CAST(label AS VARCHAR)) AS label,
+                                    MAX(dist_h) AS dist_h
+                                FROM view_eval_flat
+                                WHERE source = 'GT'
+                                GROUP BY 1, 2, 3
+                            )
                             SELECT
-                                COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
-                                COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
-                                COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
-                                COALESCE(b.tp_base, FALSE) AS tp_base,
-                                COALESCE(c.tp_comp, FALSE) AS tp_comp,
-                                COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
-                                COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
-                                COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
-                            FROM base_gt b
-                            FULL OUTER JOIN comp_gt c
-                                ON b.t4dataset_id = c.t4dataset_id
-                               AND b.frame_index = c.frame_index
-                               AND b.gt_uuid = c.gt_uuid
+                                j.t4dataset_id,
+                                j.frame_index,
+                                j.gt_uuid,
+                                COALESCE(e.label, '') AS label,
+                                COALESCE(e.dist_h, 0.0) AS dist_h,
+                                {_DIST_BIN_CASE.replace("dist_h", "COALESCE(e.dist_h, 0.0)")} AS distance_bin,
+                                j.suite_name,
+                                j.scenario_name,
+                                j.t4dataset_name,
+                                CASE
+                                    WHEN NOT j.tp_base AND j.tp_comp THEN 'improved'
+                                    WHEN j.tp_base AND NOT j.tp_comp THEN 'degraded'
+                                    WHEN j.tp_base AND j.tp_comp THEN 'both_tp'
+                                    ELSE 'both_fn'
+                                END AS change_type,
+                                j.tp_base,
+                                j.tp_comp
+                            FROM joined j
+                            LEFT JOIN obj_attrs e
+                                ON CAST(j.t4dataset_id AS VARCHAR) = CAST(e.t4dataset_id AS VARCHAR)
+                               AND j.frame_index = CAST(e.frame_index AS VARCHAR)
+                               AND j.gt_uuid = e.uuid
+                            ORDER BY change_type, j.t4dataset_id, j.frame_index
+                            """
+                    try:
+                        df_by_frame = con.execute(query_frame_p5).df()
+                    except Exception:
+                        df_by_frame = pd.DataFrame()
+                    try:
+                        df_by_object_full = con.execute(query_object_p5).df()
+                    except Exception:
+                        df_by_object_full = pd.DataFrame()
+    
+                    tot_imp = float(df_improved["improved_cnt"].sum())
+                    tot_deg = float(df_improved["degraded_cnt"].sum())
+                    tot_net = tot_imp - tot_deg
+                    net_s = f"+{int(tot_net)}" if tot_net > 0 else str(int(tot_net))
+    
+                    with st.expander(f"Run {lbl} vs A", expanded=(len(runs) == 2)):
+                        c1, c2, c3, c4 = st.columns(4)
+                        c1.metric("Improved (FN→TP)", int(tot_imp))
+                        c2.metric("Degraded (TP→FN)", int(tot_deg))
+                        c3.metric("Net TP delta", net_s)
+                        c4.caption("Start with scenarios and frames with the most **degraded** counts.")
+                        st.markdown(
+                            f"**Summary:** Net **{net_s}** TP vs baseline A — "
+                            f"**{int(tot_deg)}** degraded vs **{int(tot_imp)}** improved."
                         )
-                        SELECT
-                            t4dataset_id,
-                            frame_index,
-                            scenario_name,
-                            suite_name,
-                            t4dataset_name,
-                            CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
-                            CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
-                            CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
-                            CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
-                            CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
-                            CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta
-                        FROM joined
-                        GROUP BY t4dataset_id, frame_index, suite_name, scenario_name, t4dataset_name
-                        ORDER BY net_tp_delta DESC
-                        """
-                query_object_p5 = f"""
+    
+                        b_key = f"p5_baobab_{lbl}_{idx}"
+                        c1b, c2b, c3b = st.columns([1, 1, 1])
+                        with c1b:
+                            baobab_viz = st.radio(
+                                "Chart type",
+                                ["Sunburst", "Treemap"],
+                                horizontal=True,
+                                key=f"{b_key}_viz",
+                            )
+                        with c2b:
+                            baobab_ns = st.slider(
+                                "Max scenarios",
+                                min_value=5,
+                                max_value=25,
+                                value=15,
+                                key=f"{b_key}_ns",
+                            )
+                        with c3b:
+                            baobab_nf = st.slider(
+                                "Max frames / scenario",
+                                min_value=5,
+                                max_value=20,
+                                value=10,
+                                key=f"{b_key}_nf",
+                            )
+                        if df_by_object_full.empty:
+                            st.caption("No object-level rows for hierarchy.")
+                        else:
+                            path_cols = ["root", "scen_g", "fr_display", "label"]
+                            h_imp = _baobab_hierarchy_from_objects(
+                                df_by_object_full,
+                                "improved",
+                                f"Improved ({lbl} vs A)",
+                                baobab_ns,
+                                baobab_nf,
+                            )
+                            h_deg = _baobab_hierarchy_from_objects(
+                                df_by_object_full,
+                                "degraded",
+                                f"Degraded ({lbl} vs A)",
+                                baobab_ns,
+                                baobab_nf,
+                            )
+                            pair_both = (not h_imp.empty) and (not h_deg.empty)
+                            plot_entries = []
+                            for ct, hdf, cmap in (
+                                ("improved", h_imp, IMPROVED_SCALE),
+                                ("degraded", h_deg, DEGRADED_SCALE),
+                            ):
+                                if hdf.empty:
+                                    plot_entries.append((ct, None))
+                                    continue
+                                title = f"{baobab_viz}: {ct} (n = {int(hdf['n'].sum())} GT objects)"
+                                if baobab_viz == "Sunburst":
+                                    fig_b = px.sunburst(
+                                        hdf,
+                                        path=path_cols,
+                                        values="n",
+                                        color="n",
+                                        color_continuous_scale=cmap,
+                                        title=title,
+                                    )
+                                    h_sb = 480 if pair_both else 620
+                                    apply_chart_theme(fig_b, height=h_sb, margin=dict(t=36, l=4, r=4, b=4))
+                                else:
+                                    fig_b = px.treemap(
+                                        hdf,
+                                        path=path_cols,
+                                        values="n",
+                                        color="n",
+                                        color_continuous_scale=cmap,
+                                        title=title,
+                                    )
+                                    h_tr = 440 if pair_both else 520
+                                    apply_chart_theme(fig_b, height=h_tr, margin=dict(t=40, l=4, r=4, b=4))
+                                plot_entries.append((ct, fig_b))
+    
+                            two_up = (
+                                len(plot_entries) == 2
+                                and plot_entries[0][1] is not None
+                                and plot_entries[1][1] is not None
+                            )
+                            if two_up:
+                                bc1, bc2 = st.columns(2, gap="small")
+                                with bc1:
+                                    st.plotly_chart(
+                                        plot_entries[0][1],
+                                        width='stretch',
+                                        key=f"{b_key}_fig_{plot_entries[0][0]}",
+                                    )
+                                with bc2:
+                                    st.plotly_chart(
+                                        plot_entries[1][1],
+                                        width='stretch',
+                                        key=f"{b_key}_fig_{plot_entries[1][0]}",
+                                    )
+                            else:
+                                for ct, fig_b in plot_entries:
+                                    if fig_b is not None:
+                                        st.plotly_chart(
+                                            fig_b,
+                                            width='stretch',
+                                            key=f"{b_key}_fig_{ct}",
+                                        )
+                                    else:
+                                        st.caption(f"No **{ct}** objects to chart.")
+    
+                        # --- Comparison lens: label / scenario / frame (treemap trio, Baobab-aligned) ---
+                        query_label = f"""
                         WITH base_gt AS (
                             SELECT
                                 t4dataset_id,
                                 frame_index,
                                 uuid AS gt_uuid,
-                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base,
-                                COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                                COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                                COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
+                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base
                             FROM view_eval_flat
                             WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
                                 AND {filter_clause_base}
@@ -1984,10 +2424,8 @@ def _plot_comparison_lens_treemap(
                                 t4dataset_id,
                                 frame_index,
                                 uuid AS gt_uuid,
-                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp,
-                                COALESCE(MAX(try_cast(suite_name AS VARCHAR)), '') AS suite_name,
-                                COALESCE(MAX(try_cast(scenario_name AS VARCHAR)), '') AS scenario_name,
-                                COALESCE(MAX(try_cast(t4dataset_name AS VARCHAR)), '') AS t4dataset_name
+                                COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
+                                COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp
                             FROM {comp_flat}
                             WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
                                 AND {filter_clause_comp_p5}
@@ -1995,853 +2433,659 @@ def _plot_comparison_lens_treemap(
                         ),
                         joined AS (
                             SELECT
-                                COALESCE(CAST(b.t4dataset_id AS VARCHAR), CAST(c.t4dataset_id AS VARCHAR)) AS t4dataset_id,
-                                COALESCE(CAST(b.frame_index AS VARCHAR), CAST(c.frame_index AS VARCHAR)) AS frame_index,
-                                COALESCE(b.gt_uuid, c.gt_uuid) AS gt_uuid,
+                                COALESCE(b.label, c.label) AS label,
                                 COALESCE(b.tp_base, FALSE) AS tp_base,
-                                COALESCE(c.tp_comp, FALSE) AS tp_comp,
-                                COALESCE(b.suite_name, c.suite_name, '') AS suite_name,
-                                COALESCE(b.scenario_name, c.scenario_name, '') AS scenario_name,
-                                COALESCE(b.t4dataset_name, c.t4dataset_name, '') AS t4dataset_name
+                                COALESCE(c.tp_comp, FALSE) AS tp_comp
                             FROM base_gt b
                             FULL OUTER JOIN comp_gt c
                                 ON b.t4dataset_id = c.t4dataset_id
                                AND b.frame_index = c.frame_index
                                AND b.gt_uuid = c.gt_uuid
-                        ),
-                        obj_attrs AS (
-                            SELECT
-                                t4dataset_id,
-                                frame_index,
-                                uuid,
-                                MAX(CAST(label AS VARCHAR)) AS label,
-                                MAX(dist_h) AS dist_h
-                            FROM view_eval_flat
-                            WHERE source = 'GT'
-                            GROUP BY 1, 2, 3
                         )
                         SELECT
-                            j.t4dataset_id,
-                            j.frame_index,
-                            j.gt_uuid,
-                            COALESCE(e.label, '') AS label,
-                            COALESCE(e.dist_h, 0.0) AS dist_h,
-                            {_DIST_BIN_CASE.replace("dist_h", "COALESCE(e.dist_h, 0.0)")} AS distance_bin,
-                            j.suite_name,
-                            j.scenario_name,
-                            j.t4dataset_name,
-                            CASE
-                                WHEN NOT j.tp_base AND j.tp_comp THEN 'improved'
-                                WHEN j.tp_base AND NOT j.tp_comp THEN 'degraded'
-                                WHEN j.tp_base AND j.tp_comp THEN 'both_tp'
-                                ELSE 'both_fn'
-                            END AS change_type,
-                            j.tp_base,
-                            j.tp_comp
-                        FROM joined j
-                        LEFT JOIN obj_attrs e
-                            ON CAST(j.t4dataset_id AS VARCHAR) = CAST(e.t4dataset_id AS VARCHAR)
-                           AND j.frame_index = CAST(e.frame_index AS VARCHAR)
-                           AND j.gt_uuid = e.uuid
-                        ORDER BY change_type, j.t4dataset_id, j.frame_index
+                            label,
+                            CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
+                            CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
+                            CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
+                            CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
+                            CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
+                            CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta
+                        FROM joined
+                        GROUP BY label
+                        ORDER BY net_tp_delta DESC
                         """
-                try:
-                    df_by_frame = con.execute(query_frame_p5).df()
-                except Exception:
-                    df_by_frame = pd.DataFrame()
-                try:
-                    df_by_object_full = con.execute(query_object_p5).df()
-                except Exception:
-                    df_by_object_full = pd.DataFrame()
-
-                tot_imp = float(df_improved["improved_cnt"].sum())
-                tot_deg = float(df_improved["degraded_cnt"].sum())
-                tot_net = tot_imp - tot_deg
-                net_s = f"+{int(tot_net)}" if tot_net > 0 else str(int(tot_net))
-
-                with st.expander(f"Run {lbl} vs A", expanded=(len(runs) == 2)):
-                    c1, c2, c3, c4 = st.columns(4)
-                    c1.metric("Improved (FN→TP)", int(tot_imp))
-                    c2.metric("Degraded (TP→FN)", int(tot_deg))
-                    c3.metric("Net TP delta", net_s)
-                    c4.caption("Start with scenarios and frames with the most **degraded** counts.")
-                    st.markdown(
-                        f"**Summary:** Net **{net_s}** TP vs baseline A — "
-                        f"**{int(tot_deg)}** degraded vs **{int(tot_imp)}** improved."
-                    )
-
-                    b_key = f"p5_baobab_{lbl}_{idx}"
-                    c1b, c2b, c3b = st.columns([1, 1, 1])
-                    with c1b:
-                        baobab_viz = st.radio(
-                            "Chart type",
-                            ["Sunburst", "Treemap"],
-                            horizontal=True,
-                            key=f"{b_key}_viz",
-                        )
-                    with c2b:
-                        baobab_ns = st.slider(
-                            "Max scenarios",
-                            min_value=5,
-                            max_value=25,
-                            value=15,
-                            key=f"{b_key}_ns",
-                        )
-                    with c3b:
-                        baobab_nf = st.slider(
-                            "Max frames / scenario",
-                            min_value=5,
-                            max_value=20,
-                            value=10,
-                            key=f"{b_key}_nf",
-                        )
-                    if df_by_object_full.empty:
-                        st.caption("No object-level rows for hierarchy.")
-                    else:
-                        path_cols = ["root", "scen_g", "fr_display", "label"]
-                        h_imp = _baobab_hierarchy_from_objects(
-                            df_by_object_full,
-                            "improved",
-                            f"Improved ({lbl} vs A)",
-                            baobab_ns,
-                            baobab_nf,
-                        )
-                        h_deg = _baobab_hierarchy_from_objects(
-                            df_by_object_full,
-                            "degraded",
-                            f"Degraded ({lbl} vs A)",
-                            baobab_ns,
-                            baobab_nf,
-                        )
-                        pair_both = (not h_imp.empty) and (not h_deg.empty)
-                        plot_entries = []
-                        for ct, hdf, cmap in (
-                            ("improved", h_imp, IMPROVED_SCALE),
-                            ("degraded", h_deg, DEGRADED_SCALE),
-                        ):
-                            if hdf.empty:
-                                plot_entries.append((ct, None))
-                                continue
-                            title = f"{baobab_viz}: {ct} (n = {int(hdf['n'].sum())} GT objects)"
-                            if baobab_viz == "Sunburst":
-                                fig_b = px.sunburst(
-                                    hdf,
-                                    path=path_cols,
-                                    values="n",
-                                    color="n",
-                                    color_continuous_scale=cmap,
-                                    title=title,
+                        df_by_label = pd.DataFrame()
+                        try:
+                            df_by_label = con.execute(query_label).df()
+                        except Exception as e_label:
+                            st.caption(f"Label query: {e_label}")
+    
+                        scen_agg = pd.DataFrame()
+                        if not df_improved.empty:
+                            scen_agg = (
+                                df_improved.groupby("scenario_name", dropna=False)
+                                .agg(
+                                    improved_cnt=("improved_cnt", "sum"),
+                                    degraded_cnt=("degraded_cnt", "sum"),
+                                )
+                                .reset_index()
+                            )
+                            scen_agg = scen_agg.sort_values(
+                                by=["degraded_cnt", "improved_cnt"],
+                                ascending=[False, True],
+                            )
+    
+                        df_frame_sorted = pd.DataFrame()
+                        if not df_by_frame.empty:
+                            df_frame_sorted = df_by_frame.sort_values(
+                                by=["degraded_cnt", "improved_cnt"],
+                                ascending=[False, True],
+                            ).reset_index(drop=True)
+    
+                        root_lens = f"{lbl} vs A"
+                        lc1, lc2, lc3 = st.columns(3, gap="small")
+                        with lc1:
+                            if not df_by_label.empty:
+                                tdf_l = _comparison_lens_treemap_df(
+                                    df_by_label["label"],
+                                    df_by_label["improved_cnt"],
+                                    df_by_label["degraded_cnt"],
+                                    root_lens,
+                                )
+                                _plot_comparison_lens_treemap(
+                                    tdf_l,
+                                    f"p5_lens_lab_{lbl}_{idx}",
+                                    "By class",
                                 )
-                                h_sb = 480 if pair_both else 620
-                                apply_chart_theme(fig_b, height=h_sb, margin=dict(t=36, l=4, r=4, b=4))
                             else:
-                                fig_b = px.treemap(
-                                    hdf,
-                                    path=path_cols,
-                                    values="n",
-                                    color="n",
-                                    color_continuous_scale=cmap,
-                                    title=title,
+                                st.caption("_No label data._")
+                        with lc2:
+                            if not scen_agg.empty:
+                                tdf_s = _comparison_lens_treemap_df(
+                                    scen_agg["scenario_name"].astype(str),
+                                    scen_agg["improved_cnt"],
+                                    scen_agg["degraded_cnt"],
+                                    root_lens,
                                 )
-                                h_tr = 440 if pair_both else 520
-                                apply_chart_theme(fig_b, height=h_tr, margin=dict(t=40, l=4, r=4, b=4))
-                            plot_entries.append((ct, fig_b))
-
-                        two_up = (
-                            len(plot_entries) == 2
-                            and plot_entries[0][1] is not None
-                            and plot_entries[1][1] is not None
-                        )
-                        if two_up:
-                            bc1, bc2 = st.columns(2, gap="small")
-                            with bc1:
-                                st.plotly_chart(
-                                    plot_entries[0][1],
+                                _plot_comparison_lens_treemap(
+                                    tdf_s,
+                                    f"p5_lens_scen_{lbl}_{idx}",
+                                    "By scenario",
+                                )
+                            else:
+                                st.caption("_No scenario data._")
+                        with lc3:
+                            if not df_frame_sorted.empty:
+                                fr_cap = 36
+                                fr_top = df_frame_sorted.head(fr_cap).copy()
+                                nms = (
+                                    fr_top["scenario_name"].astype(str).str.slice(0, 26)
+                                    + "\n· f"
+                                    + fr_top["frame_index"].astype(str)
+                                ).tolist()
+                                ims = fr_top["improved_cnt"].astype(float).tolist()
+                                dgs = fr_top["degraded_cnt"].astype(float).tolist()
+                                rest = df_frame_sorted.iloc[fr_cap:]
+                                if not rest.empty:
+                                    io = float(rest["improved_cnt"].sum())
+                                    do = float(rest["degraded_cnt"].sum())
+                                    if io > 0 or do > 0:
+                                        nms.append(
+                                            f"Other frames\n({len(rest)} frames)"
+                                        )
+                                        ims.append(io)
+                                        dgs.append(do)
+                                tdf_f = _comparison_lens_treemap_df(
+                                    pd.Series(nms),
+                                    pd.Series(ims),
+                                    pd.Series(dgs),
+                                    root_lens,
+                                )
+                                _plot_comparison_lens_treemap(
+                                    tdf_f,
+                                    f"p5_lens_fr_{lbl}_{idx}",
+                                    "By frame",
+                                )
+                                st.caption(
+                                    f"Top **{fr_cap}** frames by degraded, plus **Other frames** "
+                                    f"so totals match **By class** / **By scenario**."
+                                )
+                            else:
+                                st.caption("_No frame data._")
+    
+                        with st.expander("Tables behind the lens (label / scenario / frame)"):
+                            if not df_by_label.empty:
+                                st.markdown("**Per label**")
+                                st.dataframe(
+                                    df_by_label,
                                     width='stretch',
-                                    key=f"{b_key}_fig_{plot_entries[0][0]}",
+                                    hide_index=True,
                                 )
-                            with bc2:
-                                st.plotly_chart(
-                                    plot_entries[1][1],
+                            if not scen_agg.empty:
+                                st.markdown("**Per scenario**")
+                                st.dataframe(scen_agg, width='stretch', hide_index=True)
+                            if not df_frame_sorted.empty:
+                                st.markdown("**Per frame** (sorted by degraded)")
+                                st.dataframe(
+                                    df_frame_sorted.head(200),
                                     width='stretch',
-                                    key=f"{b_key}_fig_{plot_entries[1][0]}",
+                                    hide_index=True,
                                 )
-                        else:
-                            for ct, fig_b in plot_entries:
-                                if fig_b is not None:
-                                    st.plotly_chart(
-                                        fig_b,
-                                        width='stretch',
-                                        key=f"{b_key}_fig_{ct}",
-                                    )
-                                else:
-                                    st.caption(f"No **{ct}** objects to chart.")
-
-                    # --- Comparison lens: label / scenario / frame (treemap trio, Baobab-aligned) ---
-                    query_label = f"""
-                    WITH base_gt AS (
-                        SELECT
-                            t4dataset_id,
-                            frame_index,
-                            uuid AS gt_uuid,
-                            COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
-                            COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_base
-                        FROM view_eval_flat
-                        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                            AND {filter_clause_base}
-                        GROUP BY 1, 2, 3
-                    ),
-                    comp_gt AS (
-                        SELECT
-                            t4dataset_id,
-                            frame_index,
-                            uuid AS gt_uuid,
-                            COALESCE(MAX(try_cast(label AS VARCHAR)), '') AS label,
-                            COUNT(*) FILTER (WHERE status = 'TP') > 0 AS tp_comp
-                        FROM {comp_flat}
-                        WHERE source = 'GT' AND uuid IS NOT NULL AND frame_index IS NOT NULL
-                            AND {filter_clause_comp_p5}
-                        GROUP BY 1, 2, 3
-                    ),
-                    joined AS (
-                        SELECT
-                            COALESCE(b.label, c.label) AS label,
-                            COALESCE(b.tp_base, FALSE) AS tp_base,
-                            COALESCE(c.tp_comp, FALSE) AS tp_comp
-                        FROM base_gt b
-                        FULL OUTER JOIN comp_gt c
-                            ON b.t4dataset_id = c.t4dataset_id
-                           AND b.frame_index = c.frame_index
-                           AND b.gt_uuid = c.gt_uuid
-                    )
-                    SELECT
-                        label,
-                        CAST(COUNT(*) FILTER (WHERE TRUE) AS DOUBLE) AS total_gt,
-                        CAST(COUNT(*) FILTER (WHERE NOT tp_base AND tp_comp) AS DOUBLE) AS improved_cnt,
-                        CAST(COUNT(*) FILTER (WHERE tp_base AND NOT tp_comp) AS DOUBLE) AS degraded_cnt,
-                        CAST(COUNT(*) FILTER (WHERE tp_base AND tp_comp) AS DOUBLE) AS both_tp_cnt,
-                        CAST(COUNT(*) FILTER (WHERE NOT tp_base AND NOT tp_comp) AS DOUBLE) AS both_fn_cnt,
-                        CAST(SUM((CASE WHEN tp_comp THEN 1 ELSE 0 END) - (CASE WHEN tp_base THEN 1 ELSE 0 END)) AS DOUBLE) AS net_tp_delta
-                    FROM joined
-                    GROUP BY label
-                    ORDER BY net_tp_delta DESC
-                    """
-                    df_by_label = pd.DataFrame()
-                    try:
-                        df_by_label = con.execute(query_label).df()
-                    except Exception as e_label:
-                        st.caption(f"Label query: {e_label}")
-
-                    scen_agg = pd.DataFrame()
-                    if not df_improved.empty:
-                        scen_agg = (
-                            df_improved.groupby("scenario_name", dropna=False)
-                            .agg(
-                                improved_cnt=("improved_cnt", "sum"),
-                                degraded_cnt=("degraded_cnt", "sum"),
+    
+                        with st.expander("Full dataset breakdown (per t4dataset_id row)"):
+                            st.dataframe(df_improved, width='stretch', hide_index=True)
+    
+                        # --- Drill-down: filters + objects ---
+                        with st.expander("Drill-down: objects"):
+                            scen_key = f"p5_scen_{lbl}_{idx}"
+                            t4_key = f"p5_t4_{lbl}_{idx}"
+                            lab_key = f"p5_lab_{lbl}_{idx}"
+                            for k, default in ((scen_key, []), (t4_key, []), (lab_key, [])):
+                                if k not in st.session_state:
+                                    st.session_state[k] = default
+    
+                            scenarios_all = sorted(
+                                df_improved["scenario_name"].dropna().astype(str).unique().tolist()
                             )
-                            .reset_index()
-                        )
-                        scen_agg = scen_agg.sort_values(
-                            by=["degraded_cnt", "improved_cnt"],
-                            ascending=[False, True],
-                        )
-
-                    df_frame_sorted = pd.DataFrame()
-                    if not df_by_frame.empty:
-                        df_frame_sorted = df_by_frame.sort_values(
-                            by=["degraded_cnt", "improved_cnt"],
-                            ascending=[False, True],
-                        ).reset_index(drop=True)
-
-                    root_lens = f"{lbl} vs A"
-                    lc1, lc2, lc3 = st.columns(3, gap="small")
-                    with lc1:
-                        if not df_by_label.empty:
-                            tdf_l = _comparison_lens_treemap_df(
-                                df_by_label["label"],
-                                df_by_label["improved_cnt"],
-                                df_by_label["degraded_cnt"],
-                                root_lens,
+                            t4_all = sorted(
+                                df_improved["t4dataset_name"].dropna().astype(str).unique().tolist()
                             )
-                            _plot_comparison_lens_treemap(
-                                tdf_l,
-                                f"p5_lens_lab_{lbl}_{idx}",
-                                "By class",
+                            labels_all = (
+                                sorted(df_by_object_full["label"].dropna().astype(str).unique().tolist())
+                                if not df_by_object_full.empty
+                                else []
                             )
-                        else:
-                            st.caption("_No label data._")
-                    with lc2:
-                        if not scen_agg.empty:
-                            tdf_s = _comparison_lens_treemap_df(
-                                scen_agg["scenario_name"].astype(str),
-                                scen_agg["improved_cnt"],
-                                scen_agg["degraded_cnt"],
-                                root_lens,
+                            # Keep prior picks valid so Streamlit does not reset widgets when options refresh
+                            scenarios_opts = sorted(
+                                set(scenarios_all) | set(st.session_state.get(scen_key, []) or [])
                             )
-                            _plot_comparison_lens_treemap(
-                                tdf_s,
-                                f"p5_lens_scen_{lbl}_{idx}",
-                                "By scenario",
+                            t4_opts = sorted(set(t4_all) | set(st.session_state.get(t4_key, []) or []))
+                            labels_opts = sorted(
+                                set(labels_all) | set(st.session_state.get(lab_key, []) or [])
                             )
-                        else:
-                            st.caption("_No scenario data._")
-                    with lc3:
-                        if not df_frame_sorted.empty:
-                            fr_cap = 36
-                            fr_top = df_frame_sorted.head(fr_cap).copy()
-                            nms = (
-                                fr_top["scenario_name"].astype(str).str.slice(0, 26)
-                                + "\n· f"
-                                + fr_top["frame_index"].astype(str)
-                            ).tolist()
-                            ims = fr_top["improved_cnt"].astype(float).tolist()
-                            dgs = fr_top["degraded_cnt"].astype(float).tolist()
-                            rest = df_frame_sorted.iloc[fr_cap:]
-                            if not rest.empty:
-                                io = float(rest["improved_cnt"].sum())
-                                do = float(rest["degraded_cnt"].sum())
-                                if io > 0 or do > 0:
-                                    nms.append(
-                                        f"Other frames\n({len(rest)} frames)"
+    
+                            pr1, pr2 = st.columns(2)
+                            with pr1:
+                                if st.button(
+                                    "Preset: top 5 degraded scenarios",
+                                    key=f"p5_pre_scen_{lbl}_{idx}",
+                                ):
+                                    if not df_improved.empty:
+                                        sa = (
+                                            df_improved.groupby("scenario_name", dropna=False)[
+                                                "degraded_cnt"
+                                            ]
+                                            .sum()
+                                            .sort_values(ascending=False)
+                                            .head(5)
+                                        )
+                                        st.session_state[scen_key] = [
+                                            str(x) for x in sa.index.tolist()
+                                        ]
+                                        st.rerun()
+                            fr_multiselect_key = f"p5_frkeys_{lbl}_{idx}"
+                            if fr_multiselect_key not in st.session_state:
+                                st.session_state[fr_multiselect_key] = []
+                            frame_key_labels = {}
+                            if not df_frame_sorted.empty:
+                                for _, rw in df_frame_sorted.head(40).iterrows():
+                                    fk = f"{rw['t4dataset_id']}|{rw['frame_index']}"
+                                    # Use scenario_name (not suite_name) for frame option labels
+                                    frame_key_labels[fk] = (
+                                        f"{str(rw.get('scenario_name', ''))[:36]} | "
+                                        f"f{rw['frame_index']} | deg {int(rw['degraded_cnt'])}"
                                     )
-                                    ims.append(io)
-                                    dgs.append(do)
-                            tdf_f = _comparison_lens_treemap_df(
-                                pd.Series(nms),
-                                pd.Series(ims),
-                                pd.Series(dgs),
-                                root_lens,
-                            )
-                            _plot_comparison_lens_treemap(
-                                tdf_f,
-                                f"p5_lens_fr_{lbl}_{idx}",
-                                "By frame",
-                            )
-                            st.caption(
-                                f"Top **{fr_cap}** frames by degraded, plus **Other frames** "
-                                f"so totals match **By class** / **By scenario**."
+                            with pr2:
+                                if st.button(
+                                    "Preset: top 10 degraded frames (object filter)",
+                                    key=f"p5_pre_fr_{lbl}_{idx}",
+                                ):
+                                    if frame_key_labels:
+                                        topk = list(frame_key_labels.keys())[:10]
+                                        st.session_state[fr_multiselect_key] = topk
+                                        st.rerun()
+    
+                            colf1, colf2, colf3 = st.columns(3)
+                            with colf1:
+                                if scenarios_opts:
+                                    st.multiselect(
+                                        "Filter scenario_name",
+                                        scenarios_opts,
+                                        key=scen_key,
+                                    )
+                                else:
+                                    st.caption("No scenarios.")
+                            with colf2:
+                                if t4_opts:
+                                    st.multiselect(
+                                        "Filter t4dataset_name",
+                                        t4_opts,
+                                        key=t4_key,
+                                    )
+                                else:
+                                    st.caption("No t4dataset_name.")
+                            with colf3:
+                                if labels_opts:
+                                    st.multiselect(
+                                        "Filter label",
+                                        labels_opts,
+                                        key=lab_key,
+                                    )
+                                else:
+                                    st.caption("No labels.")
+    
+                            prev_fr = st.session_state.get(fr_multiselect_key) or []
+                            base_frame_keys = list(frame_key_labels.keys())
+                            for k in prev_fr:
+                                if k not in frame_key_labels:
+                                    frame_key_labels[k] = f"(selected) frame {str(k).split('|')[-1]}"
+                            frame_opts_keys = base_frame_keys + [
+                                k for k in prev_fr if k not in base_frame_keys
+                            ]
+                            if frame_opts_keys:
+                                st.multiselect(
+                                    "Limit objects to frames (optional)",
+                                    options=frame_opts_keys,
+                                    format_func=lambda k: frame_key_labels.get(k, k),
+                                    key=fr_multiselect_key,
+                                )
+    
+                            change_type_filter = st.selectbox(
+                                "Change type",
+                                ["degraded", "improved", "all", "both_tp", "both_fn"],
+                                key=f"change_type_{lbl}_{idx}",
+                                help="Filter objects by TP change between runs.",
                             )
-                        else:
-                            st.caption("_No frame data._")
-
-                    with st.expander("Tables behind the lens (label / scenario / frame)"):
-                        if not df_by_label.empty:
-                            st.markdown("**Per label**")
-                            st.dataframe(
-                                df_by_label,
-                                width='stretch',
-                                hide_index=True,
+                            sort_obj = st.selectbox(
+                                "Sort objects by",
+                                [
+                                    "degraded_priority_then_dist",
+                                    "frame_then_uuid",
+                                    "label_then_dist",
+                                ],
+                                key=f"p5_sort_{lbl}_{idx}",
                             )
-                        if not scen_agg.empty:
-                            st.markdown("**Per scenario**")
-                            st.dataframe(scen_agg, width='stretch', hide_index=True)
-                        if not df_frame_sorted.empty:
-                            st.markdown("**Per frame** (sorted by degraded)")
-                            st.dataframe(
-                                df_frame_sorted.head(200),
-                                width='stretch',
-                                hide_index=True,
+    
+                            df_obj_show = (
+                                df_by_object_full.copy()
+                                if not df_by_object_full.empty
+                                else pd.DataFrame()
                             )
-
-                    with st.expander("Full dataset breakdown (per t4dataset_id row)"):
-                        st.dataframe(df_improved, width='stretch', hide_index=True)
-
-                    # --- Drill-down: filters + objects ---
-                    with st.expander("Drill-down: objects"):
-                        scen_key = f"p5_scen_{lbl}_{idx}"
-                        t4_key = f"p5_t4_{lbl}_{idx}"
-                        lab_key = f"p5_lab_{lbl}_{idx}"
-                        for k, default in ((scen_key, []), (t4_key, []), (lab_key, [])):
-                            if k not in st.session_state:
-                                st.session_state[k] = default
-
-                        scenarios_all = sorted(
-                            df_improved["scenario_name"].dropna().astype(str).unique().tolist()
-                        )
-                        t4_all = sorted(
-                            df_improved["t4dataset_name"].dropna().astype(str).unique().tolist()
-                        )
-                        labels_all = (
-                            sorted(df_by_object_full["label"].dropna().astype(str).unique().tolist())
-                            if not df_by_object_full.empty
-                            else []
-                        )
-                        # Keep prior picks valid so Streamlit does not reset widgets when options refresh
-                        scenarios_opts = sorted(
-                            set(scenarios_all) | set(st.session_state.get(scen_key, []) or [])
-                        )
-                        t4_opts = sorted(set(t4_all) | set(st.session_state.get(t4_key, []) or []))
-                        labels_opts = sorted(
-                            set(labels_all) | set(st.session_state.get(lab_key, []) or [])
-                        )
-
-                        pr1, pr2 = st.columns(2)
-                        with pr1:
-                            if st.button(
-                                "Preset: top 5 degraded scenarios",
-                                key=f"p5_pre_scen_{lbl}_{idx}",
-                            ):
-                                if not df_improved.empty:
-                                    sa = (
-                                        df_improved.groupby("scenario_name", dropna=False)[
-                                            "degraded_cnt"
-                                        ]
-                                        .sum()
-                                        .sort_values(ascending=False)
-                                        .head(5)
-                                    )
-                                    st.session_state[scen_key] = [
-                                        str(x) for x in sa.index.tolist()
+                            if not df_obj_show.empty:
+                                ss = st.session_state.get(scen_key) or []
+                                if ss:
+                                    df_obj_show = df_obj_show[
+                                        df_obj_show["scenario_name"].astype(str).isin(ss)
                                     ]
-                                    st.rerun()
-                        fr_multiselect_key = f"p5_frkeys_{lbl}_{idx}"
-                        if fr_multiselect_key not in st.session_state:
-                            st.session_state[fr_multiselect_key] = []
-                        frame_key_labels = {}
-                        if not df_frame_sorted.empty:
-                            for _, rw in df_frame_sorted.head(40).iterrows():
-                                fk = f"{rw['t4dataset_id']}|{rw['frame_index']}"
-                                # Use scenario_name (not suite_name) for frame option labels
-                                frame_key_labels[fk] = (
-                                    f"{str(rw.get('scenario_name', ''))[:36]} | "
-                                    f"f{rw['frame_index']} | deg {int(rw['degraded_cnt'])}"
-                                )
-                        with pr2:
-                            if st.button(
-                                "Preset: top 10 degraded frames (object filter)",
-                                key=f"p5_pre_fr_{lbl}_{idx}",
-                            ):
-                                if frame_key_labels:
-                                    topk = list(frame_key_labels.keys())[:10]
-                                    st.session_state[fr_multiselect_key] = topk
-                                    st.rerun()
-
-                        colf1, colf2, colf3 = st.columns(3)
-                        with colf1:
-                            if scenarios_opts:
-                                st.multiselect(
-                                    "Filter scenario_name",
-                                    scenarios_opts,
-                                    key=scen_key,
-                                )
-                            else:
-                                st.caption("No scenarios.")
-                        with colf2:
-                            if t4_opts:
-                                st.multiselect(
-                                    "Filter t4dataset_name",
-                                    t4_opts,
-                                    key=t4_key,
-                                )
-                            else:
-                                st.caption("No t4dataset_name.")
-                        with colf3:
-                            if labels_opts:
-                                st.multiselect(
-                                    "Filter label",
-                                    labels_opts,
-                                    key=lab_key,
-                                )
-                            else:
-                                st.caption("No labels.")
-
-                        prev_fr = st.session_state.get(fr_multiselect_key) or []
-                        base_frame_keys = list(frame_key_labels.keys())
-                        for k in prev_fr:
-                            if k not in frame_key_labels:
-                                frame_key_labels[k] = f"(selected) frame {str(k).split('|')[-1]}"
-                        frame_opts_keys = base_frame_keys + [
-                            k for k in prev_fr if k not in base_frame_keys
-                        ]
-                        if frame_opts_keys:
-                            st.multiselect(
-                                "Limit objects to frames (optional)",
-                                options=frame_opts_keys,
-                                format_func=lambda k: frame_key_labels.get(k, k),
-                                key=fr_multiselect_key,
+                                tt = st.session_state.get(t4_key) or []
+                                if tt:
+                                    df_obj_show = df_obj_show[
+                                        df_obj_show["t4dataset_name"].astype(str).isin(tt)
+                                    ]
+                                ll = st.session_state.get(lab_key) or []
+                                if ll:
+                                    df_obj_show = df_obj_show[
+                                        df_obj_show["label"].astype(str).isin(ll)
+                                    ]
+                                fk_sel = st.session_state.get(fr_multiselect_key) or []
+                                if fk_sel:
+                                    fk_set = set(fk_sel)
+                                    df_obj_show = df_obj_show[
+                                        (
+                                            df_obj_show["t4dataset_id"].astype(str)
+                                            + "|"
+                                            + df_obj_show["frame_index"].astype(str)
+                                        ).isin(fk_set)
+                                    ]
+                                if change_type_filter != "all":
+                                    df_obj_show = df_obj_show[
+                                        df_obj_show["change_type"] == change_type_filter
+                                    ]
+                                if sort_obj == "degraded_priority_then_dist":
+                                    df_obj_show = df_obj_show.copy()
+                                    df_obj_show["_prio"] = df_obj_show["change_type"].map(
+                                        {
+                                            "degraded": 0,
+                                            "improved": 1,
+                                            "both_tp": 2,
+                                            "both_fn": 3,
+                                        }
+                                    )
+                                    df_obj_show = df_obj_show.sort_values(
+                                        by=["_prio", "dist_h"],
+                                        ascending=[True, True],
+                                    ).drop(columns=["_prio"], errors="ignore")
+                                elif sort_obj == "frame_then_uuid":
+                                    df_obj_show = df_obj_show.sort_values(
+                                        by=["t4dataset_id", "frame_index", "gt_uuid"]
+                                    )
+                                else:
+                                    df_obj_show = df_obj_show.sort_values(
+                                        by=["label", "dist_h", "t4dataset_id", "frame_index"]
+                                    )
+    
+                            n_show = 200
+                            st.caption(
+                                f"Showing up to {n_show} rows; use **Download CSV** for the full filtered list."
                             )
-
-                        change_type_filter = st.selectbox(
-                            "Change type",
-                            ["degraded", "improved", "all", "both_tp", "both_fn"],
-                            key=f"change_type_{lbl}_{idx}",
-                            help="Filter objects by TP change between runs.",
-                        )
-                        sort_obj = st.selectbox(
-                            "Sort objects by",
-                            [
-                                "degraded_priority_then_dist",
-                                "frame_then_uuid",
-                                "label_then_dist",
-                            ],
-                            key=f"p5_sort_{lbl}_{idx}",
-                        )
-
-                        df_obj_show = (
-                            df_by_object_full.copy()
-                            if not df_by_object_full.empty
-                            else pd.DataFrame()
-                        )
-                        if not df_obj_show.empty:
-                            ss = st.session_state.get(scen_key) or []
-                            if ss:
-                                df_obj_show = df_obj_show[
-                                    df_obj_show["scenario_name"].astype(str).isin(ss)
-                                ]
-                            tt = st.session_state.get(t4_key) or []
-                            if tt:
-                                df_obj_show = df_obj_show[
-                                    df_obj_show["t4dataset_name"].astype(str).isin(tt)
-                                ]
-                            ll = st.session_state.get(lab_key) or []
-                            if ll:
-                                df_obj_show = df_obj_show[
-                                    df_obj_show["label"].astype(str).isin(ll)
-                                ]
-                            fk_sel = st.session_state.get(fr_multiselect_key) or []
-                            if fk_sel:
-                                fk_set = set(fk_sel)
-                                df_obj_show = df_obj_show[
-                                    (
-                                        df_obj_show["t4dataset_id"].astype(str)
-                                        + "|"
-                                        + df_obj_show["frame_index"].astype(str)
-                                    ).isin(fk_set)
-                                ]
-                            if change_type_filter != "all":
-                                df_obj_show = df_obj_show[
-                                    df_obj_show["change_type"] == change_type_filter
-                                ]
-                            if sort_obj == "degraded_priority_then_dist":
-                                df_obj_show = df_obj_show.copy()
-                                df_obj_show["_prio"] = df_obj_show["change_type"].map(
-                                    {
-                                        "degraded": 0,
-                                        "improved": 1,
-                                        "both_tp": 2,
-                                        "both_fn": 3,
-                                    }
+                            if not df_obj_show.empty:
+                                st.download_button(
+                                    label="Download filtered objects (CSV)",
+                                    data=df_obj_show.to_csv(index=False).encode("utf-8"),
+                                    file_name=f"perception_diff_{lbl}_vs_A_objects.csv",
+                                    mime="text/csv",
+                                    key=f"p5_dl_{lbl}_{idx}",
                                 )
-                                df_obj_show = df_obj_show.sort_values(
-                                    by=["_prio", "dist_h"],
-                                    ascending=[True, True],
-                                ).drop(columns=["_prio"], errors="ignore")
-                            elif sort_obj == "frame_then_uuid":
-                                df_obj_show = df_obj_show.sort_values(
-                                    by=["t4dataset_id", "frame_index", "gt_uuid"]
+                                st.dataframe(
+                                    df_obj_show.head(n_show),
+                                    width='stretch',
+                                    hide_index=True,
                                 )
                             else:
-                                df_obj_show = df_obj_show.sort_values(
-                                    by=["label", "dist_h", "t4dataset_id", "frame_index"]
-                                )
-
-                        n_show = 200
-                        st.caption(
-                            f"Showing up to {n_show} rows; use **Download CSV** for the full filtered list."
-                        )
-                        if not df_obj_show.empty:
-                            st.download_button(
-                                label="Download filtered objects (CSV)",
-                                data=df_obj_show.to_csv(index=False).encode("utf-8"),
-                                file_name=f"perception_diff_{lbl}_vs_A_objects.csv",
-                                mime="text/csv",
-                                key=f"p5_dl_{lbl}_{idx}",
-                            )
-                            st.dataframe(
-                                df_obj_show.head(n_show),
-                                width='stretch',
-                                hide_index=True,
-                            )
-                        else:
-                            st.caption("No objects match filters.")
-
-                    with st.expander("Full frame table (sort: degraded desc)"):
-                        if not df_frame_sorted.empty:
-                            st.dataframe(df_frame_sorted, width='stretch', hide_index=True)
-                        else:
-                            st.caption("No frame breakdown.")
-            else:
-                st.caption(f"Run {lbl} vs A: No data.")
-        except Exception as e:
-            st.error(f"Error (Run {lbl} vs A): {e}")
-        finally:
-            _pd_slot.empty()
-
-# =============================
-# Single mode: Frame / Object level — Where are the misses?
-# =============================
-if single_mode:
-    st.markdown(section_header_html("Frame / Object level: Where are the misses?"), unsafe_allow_html=True)
-    _fn_slot = st.empty()
-    _fn_slot.markdown(ds_spot_loading_markup("FN by frame & object"), unsafe_allow_html=True)
-    try:
-        with st.expander("FN by frame and by object", expanded=True):
-            query_fn_frame = f"""
-            SELECT
-                t4dataset_id,
-                frame_index,
-                COALESCE(MAX(CAST(scenario_name AS VARCHAR)), '') AS scenario_name,
-                COALESCE(MAX(CAST(suite_name AS VARCHAR)), '') AS suite_name,
-                COALESCE(MAX(CAST(t4dataset_name AS VARCHAR)), '') AS t4dataset_name,
-                COUNT(*) AS fn_cnt
-            FROM view_eval_flat
-            WHERE source = 'GT' AND status = 'FN' AND {filter_clause_base}
-            GROUP BY t4dataset_id, frame_index
-            ORDER BY fn_cnt DESC
-            """
-            df_fn_frame = con.execute(query_fn_frame).df()
-            query_fn_object = f"""
-            SELECT
-                t4dataset_id,
-                frame_index,
-                uuid,
-                COALESCE(CAST(label AS VARCHAR), '') AS label,
-                dist_h,
-                COALESCE(CAST(scenario_name AS VARCHAR), '') AS scenario_name,
-                COALESCE(CAST(suite_name AS VARCHAR), '') AS suite_name
-            FROM view_eval_flat
-            WHERE source = 'GT' AND status = 'FN' AND {filter_clause_base}
-            ORDER BY t4dataset_id, frame_index, uuid
-            """
-            df_fn_object = con.execute(query_fn_object).df()
-            if not df_fn_frame.empty:
-                st.markdown("**FN count by frame**")
-                st.download_button("Download FN by frame (CSV)", data=df_fn_frame.to_csv(index=False).encode("utf-8"), file_name="fn_by_frame.csv", mime="text/csv", key="dl_fn_frame")
-                st.dataframe(df_fn_frame, width='stretch', hide_index=True)
-            else:
-                st.caption("No FN by frame.")
-            if not df_fn_object.empty:
-                st.markdown("**FN objects**")
-                if len(df_fn_object) > 500:
-                    st.caption(f"Showing first 500 of {len(df_fn_object)} FN objects.")
-                    st.dataframe(df_fn_object.head(500), width='stretch', hide_index=True)
+                                st.caption("No objects match filters.")
+    
+                        with st.expander("Full frame table (sort: degraded desc)"):
+                            if not df_frame_sorted.empty:
+                                st.dataframe(df_frame_sorted, width='stretch', hide_index=True)
+                            else:
+                                st.caption("No frame breakdown.")
                 else:
-                    st.dataframe(df_fn_object, width='stretch', hide_index=True)
-            else:
-                st.caption("No FN objects.")
-    except Exception as e:
-        st.error(f"Error in FN by frame/object: {e}")
-    finally:
-        _fn_slot.empty()
-
-# =============================
-# Panel 6: Mean Error (single) / Mean Error Comparison (compare)
-# =============================
-st.divider()
-st.markdown(
-    section_header_html(
-        "Mean Error" + (" Comparison" if not single_mode else ""),
-        "Mean absolute error on TP matches (X/Y in m, Yaw in rad)."
-        + (" Compare mode: choose grouped bars or spider charts." if not single_mode else ""),
-    ),
-    unsafe_allow_html=True,
-)
-
-try:
-    sample_query = "SELECT * FROM view_eval_flat LIMIT 1"
-    sample_df = con.execute(sample_query).df()
-    has_error_cols = all(col in sample_df.columns for col in ['x_error', 'y_error', 'yaw_error'])
-except Exception:
-    has_error_cols = False
-
-if not has_error_cols:
-    st.info("Error columns (x_error, y_error, yaw_error) not found in data. Skipping error analysis.")
-else:
+                    st.caption(f"Run {lbl} vs A: No data.")
+            except Exception as e:
+                st.error(f"Error (Run {lbl} vs A): {e}")
+            finally:
+                _pd_slot.empty()
+    
+    # =============================
+    # Single mode: Frame / Object level — Where are the misses?
+    # =============================
     if single_mode:
+        ds_dlog("section: Frame_FN_misses_start")
+        st.markdown(section_header_html("Frame / Object level: Where are the misses?"), unsafe_allow_html=True)
+        _fn_slot = st.empty()
+        _fn_slot.markdown(ds_spot_loading_markup("FN by frame & object"), unsafe_allow_html=True)
         try:
-            with ds_spot_loading("Mean error"):
-                query = f"""
+            with st.expander("FN by frame and by object", expanded=True):
+                query_fn_frame = f"""
                 SELECT
-                    label,
-                    AVG(ABS(CAST(x_error AS DOUBLE))) FILTER (
-                        WHERE status = 'TP' AND x_error IS NOT NULL
-                    ) AS mean_abs_x_error,
-                    AVG(ABS(CAST(y_error AS DOUBLE))) FILTER (
-                        WHERE status = 'TP' AND y_error IS NOT NULL
-                    ) AS mean_abs_y_error,
-                    AVG(ABS(CAST(yaw_error AS DOUBLE))) FILTER (
-                        WHERE status = 'TP' AND yaw_error IS NOT NULL
-                    ) AS mean_abs_yaw_error
+                    t4dataset_id,
+                    frame_index,
+                    COALESCE(MAX(CAST(scenario_name AS VARCHAR)), '') AS scenario_name,
+                    COALESCE(MAX(CAST(suite_name AS VARCHAR)), '') AS suite_name,
+                    COALESCE(MAX(CAST(t4dataset_name AS VARCHAR)), '') AS t4dataset_name,
+                    COUNT(*) AS fn_cnt
                 FROM view_eval_flat
-                WHERE {filter_clause_base}
-                GROUP BY label
-                ORDER BY label
+                WHERE source = 'GT' AND status = 'FN' AND {filter_clause_base}
+                GROUP BY t4dataset_id, frame_index
+                ORDER BY fn_cnt DESC
                 """
-                df_error_base = con.execute(query).df()
-            if not df_error_base.empty:
-                fig = go.Figure()
-                fig.add_trace(go.Bar(
-                    x=df_error_base['label'],
-                    y=df_error_base['mean_abs_x_error'],
-                    name='X Error',
-                    marker_color=RUN_COLORS[0],
-                ))
-                fig.add_trace(go.Bar(
-                    x=df_error_base['label'],
-                    y=df_error_base['mean_abs_y_error'],
-                    name='Y Error',
-                    marker_color=RUN_COLORS[1],
-                ))
-                fig.add_trace(go.Bar(
-                    x=df_error_base['label'],
-                    y=df_error_base['mean_abs_yaw_error'],
-                    name='Yaw Error',
-                    marker_color=RUN_COLORS[2],
-                ))
-                apply_chart_theme(fig)
-                fig.update_layout(
-                    title=f"Mean Error within {max_eval_range} [m]",
-                    xaxis_title="Label",
-                    yaxis_title="Error [m] or [rad]",
-                    barmode='group'
-                )
-                st.plotly_chart(fig, width="stretch")
-            else:
-                st.info("No data available")
+                df_fn_frame = con.execute(query_fn_frame).df()
+                query_fn_object = f"""
+                SELECT
+                    t4dataset_id,
+                    frame_index,
+                    uuid,
+                    COALESCE(CAST(label AS VARCHAR), '') AS label,
+                    dist_h,
+                    COALESCE(CAST(scenario_name AS VARCHAR), '') AS scenario_name,
+                    COALESCE(CAST(suite_name AS VARCHAR), '') AS suite_name
+                FROM view_eval_flat
+                WHERE source = 'GT' AND status = 'FN' AND {filter_clause_base}
+                ORDER BY t4dataset_id, frame_index, uuid
+                """
+                df_fn_object = con.execute(query_fn_object).df()
+                if not df_fn_frame.empty:
+                    st.markdown("**FN count by frame**")
+                    st.download_button("Download FN by frame (CSV)", data=df_fn_frame.to_csv(index=False).encode("utf-8"), file_name="fn_by_frame.csv", mime="text/csv", key="dl_fn_frame")
+                    st.dataframe(df_fn_frame, width='stretch', hide_index=True)
+                else:
+                    st.caption("No FN by frame.")
+                if not df_fn_object.empty:
+                    st.markdown("**FN objects**")
+                    if len(df_fn_object) > 500:
+                        st.caption(f"Showing first 500 of {len(df_fn_object)} FN objects.")
+                        st.dataframe(df_fn_object.head(500), width='stretch', hide_index=True)
+                    else:
+                        st.dataframe(df_fn_object, width='stretch', hide_index=True)
+                else:
+                    st.caption("No FN objects.")
         except Exception as e:
-            st.error(f"Error: {e}")
+            st.error(f"Error in FN by frame/object: {e}")
+        finally:
+            _fn_slot.empty()
+    
+    # =============================
+    # Panel 6: Mean Error (single) / Mean Error Comparison (compare)
+    # =============================
+    ds_dlog("section: Panel6_Mean_Error_start")
+    st.divider()
+    st.markdown(
+        section_header_html(
+            "Mean Error" + (" Comparison" if not single_mode else ""),
+            "Mean absolute error on TP matches (X/Y in m, Yaw in rad)."
+            + (" Compare mode: choose grouped bars or spider charts." if not single_mode else ""),
+        ),
+        unsafe_allow_html=True,
+    )
+    
+    try:
+        sample_query = "SELECT * FROM view_eval_flat LIMIT 1"
+        sample_df = con.execute(sample_query).df()
+        has_error_cols = all(col in sample_df.columns for col in ['x_error', 'y_error', 'yaw_error'])
+    except Exception:
+        has_error_cols = False
+    
+    if not has_error_cols:
+        st.info("Error columns (x_error, y_error, yaw_error) not found in data. Skipping error analysis.")
     else:
-        try:
-            with ds_spot_loading("Mean error"):
-                dfs_err = []
-                for i in range(len(runs)):
-                    fc = build_filter_clause(filters_list[i])
-                    q = f"""
+        if single_mode:
+            try:
+                with ds_spot_loading("Mean error"):
+                    query = f"""
                     SELECT
                         label,
-                        AVG(ABS(CAST(x_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND x_error IS NOT NULL) AS mean_abs_x_error,
-                        AVG(ABS(CAST(y_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND y_error IS NOT NULL) AS mean_abs_y_error,
-                        AVG(ABS(CAST(yaw_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND yaw_error IS NOT NULL) AS mean_abs_yaw_error
-                    FROM {_flat_view(i)}
-                    WHERE {fc}
+                        AVG(ABS(CAST(x_error AS DOUBLE))) FILTER (
+                            WHERE status = 'TP' AND x_error IS NOT NULL
+                        ) AS mean_abs_x_error,
+                        AVG(ABS(CAST(y_error AS DOUBLE))) FILTER (
+                            WHERE status = 'TP' AND y_error IS NOT NULL
+                        ) AS mean_abs_y_error,
+                        AVG(ABS(CAST(yaw_error AS DOUBLE))) FILTER (
+                            WHERE status = 'TP' AND yaw_error IS NOT NULL
+                        ) AS mean_abs_yaw_error
+                    FROM view_eval_flat
+                    WHERE {filter_clause_base}
                     GROUP BY label
                     ORDER BY label
                     """
-                    df_i = con.execute(q).df()
-                    df_i["run"] = run_labels_list[i]
-                    dfs_err.append(df_i)
-                df_err_melt = pd.concat(dfs_err, ignore_index=True)
-            if not df_err_melt.empty:
-                mean_err_viz = st.radio(
-                    "Mean error chart style",
-                    options=["Spider chart (X, Y & Yaw)", "Grouped bar"],
-                    index=0,
-                    horizontal=True,
-                    key="mean_err_compare_viz",
-                )
-                if mean_err_viz == "Grouped bar":
-                    for err_type, col in [
-                        ("X Error", "mean_abs_x_error"),
-                        ("Y Error", "mean_abs_y_error"),
-                        ("Yaw Error", "mean_abs_yaw_error"),
-                    ]:
-                        fig = px.bar(
-                            df_err_melt,
-                            x="label",
-                            y=col,
-                            color="run",
-                            barmode="group",
-                            title=f"Mean {err_type} within {max_eval_range} [m] by run",
-                            labels={"label": "Label", col: err_type, "run": "Run"},
-                            color_discrete_sequence=RUN_COLORS,
-                        )
-                        apply_chart_theme(fig)
-                        st.plotly_chart(fig, width="stretch")
+                    df_error_base = con.execute(query).df()
+                if not df_error_base.empty:
+                    fig = go.Figure()
+                    fig.add_trace(go.Bar(
+                        x=df_error_base['label'],
+                        y=df_error_base['mean_abs_x_error'],
+                        name='X Error',
+                        marker_color=RUN_COLORS[0],
+                    ))
+                    fig.add_trace(go.Bar(
+                        x=df_error_base['label'],
+                        y=df_error_base['mean_abs_y_error'],
+                        name='Y Error',
+                        marker_color=RUN_COLORS[1],
+                    ))
+                    fig.add_trace(go.Bar(
+                        x=df_error_base['label'],
+                        y=df_error_base['mean_abs_yaw_error'],
+                        name='Yaw Error',
+                        marker_color=RUN_COLORS[2],
+                    ))
+                    apply_chart_theme(fig)
+                    fig.update_layout(
+                        title=f"Mean Error within {max_eval_range} [m]",
+                        xaxis_title="Label",
+                        yaxis_title="Error [m] or [rad]",
+                        barmode='group'
+                    )
+                    st.plotly_chart(fig, width="stretch")
                 else:
-                    st.caption(
-                        f"Three spiders: mean |error| per label per run (TP only), within **{max_eval_range} m** "
-                        "(same as sidebar max range)."
+                    st.info("No data available")
+            except Exception as e:
+                st.error(f"Error: {e}")
+        else:
+            try:
+                with ds_spot_loading("Mean error"):
+                    dfs_err = []
+                    for i in range(len(runs)):
+                        fc = build_filter_clause(filters_list[i])
+                        q = f"""
+                        SELECT
+                            label,
+                            AVG(ABS(CAST(x_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND x_error IS NOT NULL) AS mean_abs_x_error,
+                            AVG(ABS(CAST(y_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND y_error IS NOT NULL) AS mean_abs_y_error,
+                            AVG(ABS(CAST(yaw_error AS DOUBLE))) FILTER (WHERE status = 'TP' AND yaw_error IS NOT NULL) AS mean_abs_yaw_error
+                        FROM {_flat_view(i)}
+                        WHERE {fc}
+                        GROUP BY label
+                        ORDER BY label
+                        """
+                        df_i = con.execute(q).df()
+                        df_i["run"] = run_labels_list[i]
+                        dfs_err.append(df_i)
+                    df_err_melt = pd.concat(dfs_err, ignore_index=True)
+                if not df_err_melt.empty:
+                    mean_err_viz = st.radio(
+                        "Mean error chart style",
+                        options=["Spider chart (X, Y & Yaw)", "Grouped bar"],
+                        index=0,
+                        horizontal=True,
+                        key="mean_err_compare_viz",
                     )
-                    cats = sorted(df_err_melt["label"].astype(str).unique())
-                    if len(cats) > 16:
-                        st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
-                    rcols = st.columns(3)
-                    err_specs = [
-                        (
-                            f"Mean |x error| (within {max_eval_range} m)",
-                            "mean_abs_x_error",
-                            "Mean |x error| (m)",
-                            ".3f",
-                        ),
-                        (
-                            f"Mean |y error| (within {max_eval_range} m)",
-                            "mean_abs_y_error",
-                            "Mean |y error| (m)",
-                            ".3f",
-                        ),
-                        (
-                            f"Mean |yaw error| (within {max_eval_range} m)",
-                            "mean_abs_yaw_error",
-                            "Mean |yaw error| (rad)",
-                            ".4f",
-                        ),
-                    ]
-                    for ci, (chart_title, col, hover_lbl, tfmt) in enumerate(err_specs):
-                        fig_r = _scalar_metric_spider_compare(
-                            df_err_melt,
-                            cats,
-                            chart_title,
-                            run_labels_list,
-                            col,
-                            hover_lbl,
-                            height=400,
-                            tickformat=tfmt,
+                    if mean_err_viz == "Grouped bar":
+                        for err_type, col in [
+                            ("X Error", "mean_abs_x_error"),
+                            ("Y Error", "mean_abs_y_error"),
+                            ("Yaw Error", "mean_abs_yaw_error"),
+                        ]:
+                            fig = px.bar(
+                                df_err_melt,
+                                x="label",
+                                y=col,
+                                color="run",
+                                barmode="group",
+                                title=f"Mean {err_type} within {max_eval_range} [m] by run",
+                                labels={"label": "Label", col: err_type, "run": "Run"},
+                                color_discrete_sequence=RUN_COLORS,
+                            )
+                            apply_chart_theme(fig)
+                            st.plotly_chart(fig, width="stretch")
+                    else:
+                        st.caption(
+                            f"Three spiders: mean |error| per label per run (TP only), within **{max_eval_range} m** "
+                            "(same as sidebar max range)."
                         )
-                        with rcols[ci]:
-                            st.plotly_chart(fig_r, width='stretch')
-            else:
-                st.info("No data available")
-        except Exception as e:
-            st.error(f"Error: {e}")
-
-        st.markdown(section_header_html("Difference of mean absolute error (each run − Baseline A)"), unsafe_allow_html=True)
-        for idx in range(1, len(runs)):
-            lbl = run_labels_list[idx]
-            _med_slot = st.empty()
-            _med_slot.markdown(ds_spot_loading_markup(f"Mean error diff · run {lbl}"), unsafe_allow_html=True)
-            try:
-                fc_c = build_filter_clause(filters_list[idx])
-                query = f"""
-                WITH topic_a AS (
-                    SELECT label,
-                        AVG(ABS(x_error)) FILTER (WHERE status = 'TP') AS x_a,
-                        AVG(ABS(y_error)) FILTER (WHERE status = 'TP') AS y_a,
-                        AVG(ABS(yaw_error)) FILTER (WHERE status = 'TP') AS yaw_a
-                    FROM view_eval_flat
-                    WHERE {filter_clause_base}
-                    GROUP BY label
-                ),
-                topic_c AS (
-                    SELECT label,
-                        AVG(ABS(x_error)) FILTER (WHERE status = 'TP') AS x_c,
-                        AVG(ABS(y_error)) FILTER (WHERE status = 'TP') AS y_c,
-                        AVG(ABS(yaw_error)) FILTER (WHERE status = 'TP') AS yaw_c
-                    FROM {_flat_view(idx)}
-                    WHERE {fc_c}
-                    GROUP BY label
-                )
-                SELECT a.label,
-                    (c.x_c - a.x_a) AS x_diff,
-                    (c.y_c - a.y_a) AS y_diff,
-                    (c.yaw_c - a.yaw_a) AS yaw_diff
-                FROM topic_a a
-                JOIN topic_c c USING (label)
-                ORDER BY label
-                """
-                df_ed = con.execute(query).df()
-                if not df_ed.empty:
-                    with st.expander(f"Run {lbl} − A", expanded=(len(runs) == 2)):
-                        fig = go.Figure()
-                        fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["x_diff"], name="X Diff", marker_color=RUN_COLORS[0]))
-                        fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["y_diff"], name="Y Diff", marker_color=RUN_COLORS[1]))
-                        fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["yaw_diff"], name="Yaw Diff", marker_color=RUN_COLORS[2]))
-                        apply_chart_theme(fig)
-                        fig.update_layout(title=f"Error diff ({lbl} − A) within {max_eval_range} [m]", xaxis_title="Label", yaxis_title="Error Difference [m] or [rad]", barmode="group")
-                        st.plotly_chart(fig, width="stretch")
+                        cats = sorted(df_err_melt["label"].astype(str).unique())
+                        if len(cats) > 16:
+                            st.caption("Spider charts work best with ≤16 labels; many classes may look crowded.")
+                        rcols = st.columns(3)
+                        err_specs = [
+                            (
+                                f"Mean |x error| (within {max_eval_range} m)",
+                                "mean_abs_x_error",
+                                "Mean |x error| (m)",
+                                ".3f",
+                            ),
+                            (
+                                f"Mean |y error| (within {max_eval_range} m)",
+                                "mean_abs_y_error",
+                                "Mean |y error| (m)",
+                                ".3f",
+                            ),
+                            (
+                                f"Mean |yaw error| (within {max_eval_range} m)",
+                                "mean_abs_yaw_error",
+                                "Mean |yaw error| (rad)",
+                                ".4f",
+                            ),
+                        ]
+                        for ci, (chart_title, col, hover_lbl, tfmt) in enumerate(err_specs):
+                            fig_r = _scalar_metric_spider_compare(
+                                df_err_melt,
+                                cats,
+                                chart_title,
+                                run_labels_list,
+                                col,
+                                hover_lbl,
+                                height=400,
+                                tickformat=tfmt,
+                            )
+                            with rcols[ci]:
+                                st.plotly_chart(fig_r, width='stretch')
+                else:
+                    st.info("No data available")
             except Exception as e:
-                st.error(f"Error (Run {lbl} − A): {e}")
-            finally:
-                _med_slot.empty()
+                st.error(f"Error: {e}")
+    
+            st.markdown(section_header_html("Difference of mean absolute error (each run − Baseline A)"), unsafe_allow_html=True)
+            for idx in range(1, len(runs)):
+                lbl = run_labels_list[idx]
+                _med_slot = st.empty()
+                _med_slot.markdown(ds_spot_loading_markup(f"Mean error diff · run {lbl}"), unsafe_allow_html=True)
+                try:
+                    fc_c = build_filter_clause(filters_list[idx])
+                    query = f"""
+                    WITH topic_a AS (
+                        SELECT label,
+                            AVG(ABS(x_error)) FILTER (WHERE status = 'TP') AS x_a,
+                            AVG(ABS(y_error)) FILTER (WHERE status = 'TP') AS y_a,
+                            AVG(ABS(yaw_error)) FILTER (WHERE status = 'TP') AS yaw_a
+                        FROM view_eval_flat
+                        WHERE {filter_clause_base}
+                        GROUP BY label
+                    ),
+                    topic_c AS (
+                        SELECT label,
+                            AVG(ABS(x_error)) FILTER (WHERE status = 'TP') AS x_c,
+                            AVG(ABS(y_error)) FILTER (WHERE status = 'TP') AS y_c,
+                            AVG(ABS(yaw_error)) FILTER (WHERE status = 'TP') AS yaw_c
+                        FROM {_flat_view(idx)}
+                        WHERE {fc_c}
+                        GROUP BY label
+                    )
+                    SELECT a.label,
+                        (c.x_c - a.x_a) AS x_diff,
+                        (c.y_c - a.y_a) AS y_diff,
+                        (c.yaw_c - a.yaw_a) AS yaw_diff
+                    FROM topic_a a
+                    JOIN topic_c c USING (label)
+                    ORDER BY label
+                    """
+                    df_ed = con.execute(query).df()
+                    if not df_ed.empty:
+                        with st.expander(f"Run {lbl} − A", expanded=(len(runs) == 2)):
+                            fig = go.Figure()
+                            fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["x_diff"], name="X Diff", marker_color=RUN_COLORS[0]))
+                            fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["y_diff"], name="Y Diff", marker_color=RUN_COLORS[1]))
+                            fig.add_trace(go.Bar(x=df_ed["label"], y=df_ed["yaw_diff"], name="Yaw Diff", marker_color=RUN_COLORS[2]))
+                            apply_chart_theme(fig)
+                            fig.update_layout(title=f"Error diff ({lbl} − A) within {max_eval_range} [m]", xaxis_title="Label", yaxis_title="Error Difference [m] or [rad]", barmode="group")
+                            st.plotly_chart(fig, width="stretch")
+                except Exception as e:
+                    st.error(f"Error (Run {lbl} − A): {e}")
+                finally:
+                    _med_slot.empty()
+    
+    ds_dlog("main_content_try_exit_ok")
+    ds_debug_log_memory("main_content_end")
+
+except Exception as _e_ds_main:
+    ds_debug_log_exception("detection_stats_main_try", _e_ds_main)
+    raise
 
-_ds_loading_banner.empty()
+finally:
+    try:
+        ds_debug_render_expander(st.session_state)
+    except Exception as _e_dbg_exp:
+        ds_debug_log_exception("ds_debug_render_expander", _e_dbg_exp)
+    ds_dlog("main_content_finally_banner_clear")
+    _ds_loading_banner.empty()
+    ds_dlog("detection_stats_script_run_complete")
diff --git a/evaluation_dashboard_app/pages/4_Bounding_Box_Viewer.py b/evaluation_dashboard_app/pages/4_Bounding_Box_Viewer.py
index 8df5009..1d36567 100644
--- a/evaluation_dashboard_app/pages/4_Bounding_Box_Viewer.py
+++ b/evaluation_dashboard_app/pages/4_Bounding_Box_Viewer.py
@@ -1,5 +1,8 @@
+import html
 import duckdb
+import requests
 import streamlit as st
+import streamlit.components.v1 as components
 import plotly.graph_objects as go
 import plotly.express as px
 import numpy as np
@@ -12,6 +15,17 @@
 from lib.parquet_schema import schema_flags
 from lib.page_chrome import inject_app_page_styles, render_loaded_data_section, render_page_hero
 from lib.ui.bounding_box_viewer_ui import bev_overlay_line_and_status_legend_markup, bev_status_legend_markup
+from lib.t4_dataset_embed import t4_share_query_params
+from lib.t4_three_layers import resolve_t4_dataset_id, resolve_t4_scenario
+from lib.t4_visualizer_client import (
+    DEFAULT_BASE_URL,
+    ENV_BASE_URL,
+    RenderRequest,
+    TargetObjectIn,
+    T4VisualizerClient,
+    T4VisualizerError,
+    target_object_from_gt_row,
+)
 
 st.set_page_config(
     layout="wide",
@@ -311,6 +325,36 @@ def list_parquets_in_run(run_path) -> List[str]:
     )
     compare_view_mode = "overlay" if "Overlay" in compare_view_mode else "side_by_side"
 
+# --- T4 visualizer (base URL + preview mode in sidebar)
+with st.sidebar:
+    st.markdown("##### T4 visualizer")
+    st.caption("Uses **GET /datasets/{id}/availability** first; preview runs only if the server reports the dataset is available.")
+    if "bbox_t4_base_url" not in st.session_state:
+        st.session_state["bbox_t4_base_url"] = (
+            (os.environ.get(ENV_BASE_URL) or DEFAULT_BASE_URL).strip() or DEFAULT_BASE_URL
+        )
+    st.text_input(
+        "T4 server base URL",
+        key="bbox_t4_base_url",
+        help=f"Default from env `{ENV_BASE_URL}`; needs **GET /render/html** (iframe) and **POST /render** (PNG mode).",
+    )
+    _t4_mode = st.radio(
+        "T4 preview",
+        ["html_iframe", "post_png"],
+        format_func=lambda m: (
+            "HTML iframe (/render/html)" if m == "html_iframe" else "POST /render (PNGs here)"
+        ),
+        key="bbox_t4_preview_mode",
+        horizontal=True,
+    )
+    if _t4_mode == "post_png":
+        _t4p1, _t4p2 = st.columns(2)
+        with _t4p1:
+            st.checkbox("Crop cameras", value=True, key="bbox_t4_crop_cameras")
+            st.checkbox("Show dataset annotations", value=True, key="bbox_t4_show_ann")
+        with _t4p2:
+            st.checkbox("Draw GT rows as target boxes", value=True, key="bbox_t4_overlay_gt")
+
 
 # ----------------------------
 # Build query safely & load data
@@ -329,9 +373,14 @@ def list_parquets_in_run(run_path) -> List[str]:
     params.extend(selected_visibility)
 
 select_extras = (", " + ", ".join(hover_extra_cols)) if hover_extra_cols else ""
+# Optional columns for T4 server overlay (z/height) and resolving dataset / scenario per row
+_geom_for_t4 = [c for c in ("z", "height") if c in cols and c not in hover_extra_cols]
+_geom_select = (", " + ", ".join(_geom_for_t4)) if _geom_for_t4 else ""
+_t4_meta_cols = [c for c in ("t4dataset_id", "t4dataset_name", "scenario_name") if c in cols]
+_t4_meta_select = (", " + ", ".join(_t4_meta_cols)) if _t4_meta_cols else ""
 sql = f"""
 SELECT frame_index, x, y, length, width, yaw, label, topic_name, source, status, uuid
-{select_vis}{select_extras}
+{select_vis}{select_extras}{_geom_select}{_t4_meta_select}
 FROM parquet_scan(?)
 WHERE {" AND ".join(where)}
 ORDER BY frame_index
@@ -447,6 +496,257 @@ def get_color(source, status): return color_map.get((source, status), "#999999")
 with k4: st.metric("TP (EST)", tp_est_count)
 with k5: st.metric("TPR", f"{tpr_frame:.2%}" if tpr_frame is not None else "—")
 
+# ----------------------------
+# T4 visualizer (HTTP server): camera PNGs for current frame
+# ----------------------------
+def _bbox_t4_request_key(
+    ds: str,
+    sc: str,
+    frame_idx: int,
+    base_url: str,
+    crop: bool,
+    show_ann: bool,
+    overlay_gt: bool,
+) -> Tuple[Any, ...]:
+    return (
+        str(ds),
+        str(sc),
+        int(frame_idx),
+        str(base_url).rstrip("/"),
+        bool(crop),
+        bool(show_ann),
+        bool(overlay_gt),
+    )
+
+
+_t4_preview_mode = st.session_state.get("bbox_t4_preview_mode", "html_iframe")
+
+base_url_t4 = (st.session_state.get("bbox_t4_base_url") or "").strip() or DEFAULT_BASE_URL
+
+_ds_t4 = resolve_t4_dataset_id(df_frame)
+if not _ds_t4 and selected_t4dataset is not None:
+    _ds_t4 = str(selected_t4dataset)
+_sc_t4 = resolve_t4_scenario(df_frame, selected_scenario)
+
+if not _ds_t4:
+    for _k in (
+        "bbox_t4_last_images",
+        "bbox_t4_last_meta",
+        "bbox_t4_success_key",
+        "bbox_t4_error_key",
+        "bbox_t4_error_msg",
+        "bbox_t4_availability",
+    ):
+        st.session_state.pop(_k, None)
+    st.caption("T4 camera preview is not available for this scene.")
+    with st.expander("Details", expanded=False):
+        st.markdown(
+            "Needs parquet **t4dataset_id** or **t4dataset_name** (or **t4dataset_name** in the sidebar when "
+            "multiple datasets exist). "
+            "The Tier4 HTTP visualizer (`t4-server`) must serve that dataset. "
+            f"Set **T4 server base URL** in the sidebar or `{ENV_BASE_URL}`."
+        )
+else:
+    _t4_avail_cache_key = f"{base_url_t4.rstrip('/')}|{_ds_t4}"
+    _cached_av = st.session_state.get("bbox_t4_availability")
+    _need_avail_fetch = _cached_av is None or _cached_av.get("cache_key") != _t4_avail_cache_key
+    if _need_avail_fetch:
+        try:
+            with st.spinner("Checking T4 dataset on the server…"):
+                _av_client = T4VisualizerClient(base_url=base_url_t4, timeout=30.0)
+                _av_data = _av_client.dataset_availability(_ds_t4)
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": True,
+                "available": bool(_av_data.get("available")),
+                "data": _av_data,
+                "error": None,
+            }
+        except T4VisualizerError as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"T4 server error ({ex.status_code}): {ex}",
+            }
+        except (OSError, requests.RequestException) as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"Network error: {ex}",
+            }
+        except Exception as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"Availability check failed: {ex}",
+            }
+
+    _av = st.session_state.get("bbox_t4_availability") or {}
+
+    if not _av.get("ok"):
+        st.caption("T4 preview skipped — could not verify dataset on the visualizer server.")
+        with st.expander("Details", expanded=False):
+            st.markdown(_av.get("error") or "Unknown error.")
+    elif not _av.get("available"):
+        st.caption("T4 preview skipped — this dataset is not on the visualizer server host.")
+        with st.expander("Details", expanded=False):
+            _d = _av.get("data")
+            if isinstance(_d, dict) and _d:
+                st.json(_d)
+            else:
+                st.markdown(
+                    "The server reported **available: false** (no local dataset path for this id on the machine "
+                    "running `t4-server`)."
+                )
+    else:
+        _q_three = t4_share_query_params(_ds_t4, _sc_t4, int(frame))
+        _viewer_three_url = f"{base_url_t4.rstrip('/')}/viewer/three?{_q_three}"
+        st.caption("**3D viewer** (Three.js, GT / pred / matched layers) lives on a dedicated page.")
+        c3d_a, c3d_b = st.columns([1, 2])
+        with c3d_a:
+            st.page_link("pages/5_T4_3D_Viewer.py", label="Open T4 3D Viewer", icon="🧊")
+        with c3d_b:
+            st.markdown(f"[Open `/viewer/three` in new tab]({_viewer_three_url})")
+
+    if not _av.get("ok") or not _av.get("available"):
+        pass
+    elif _t4_preview_mode == "html_iframe":
+        _q = t4_share_query_params(_ds_t4, _sc_t4, int(frame))
+        _render_html_url = f"{base_url_t4.rstrip('/')}/render/html?{_q}"
+        st.markdown(f"[Open in new tab]({_render_html_url})")
+        _iframe_h = 900
+        # Iframe shell: neutral gray while the document loads (avoid #141418 — reads as a black box for ~2s until
+        # the large /render/html response paints; inner page still sets its own dark background).
+        components.html(
+            f'<iframe src="{html.escape(_render_html_url, quote=True)}" '
+            f'width="100%" height="{_iframe_h}" style="border:none;border-radius:8px;background:#e2e8f0" '
+            f'loading="lazy" title="T4 camera render" referrerpolicy="no-referrer-when-downgrade"></iframe>',
+            height=_iframe_h + 24,
+            scrolling=True,
+        )
+    elif not _sc_t4:
+        st.caption("POST /render mode needs **scenario_name** (sidebar or parquet) for this scene.")
+        with st.expander("Details", expanded=False):
+            st.markdown(
+                "Pick a **Scenario name** in the sidebar or ensure parquet includes **scenario_name**. "
+                "Alternatively switch to **HTML iframe** mode if the server accepts an empty scenario for your dataset."
+            )
+    else:
+        t4_crop = bool(st.session_state.get("bbox_t4_crop_cameras", True))
+        t4_show_ann = bool(st.session_state.get("bbox_t4_show_ann", True))
+        t4_overlay_gt = bool(st.session_state.get("bbox_t4_overlay_gt", True))
+
+        _req_key = _bbox_t4_request_key(
+            _ds_t4,
+            _sc_t4,
+            int(frame),
+            base_url_t4,
+            t4_crop,
+            t4_show_ann,
+            t4_overlay_gt,
+        )
+        _ok_key = st.session_state.get("bbox_t4_success_key")
+        _bad_key = st.session_state.get("bbox_t4_error_key")
+
+        _should_fetch = _req_key != _ok_key and _req_key != _bad_key
+
+        if _should_fetch:
+            try:
+                with st.spinner("Loading T4 camera renders… (usually ~2 seconds)"):
+                    client = T4VisualizerClient(
+                        base_url=base_url_t4,
+                        timeout=120.0,
+                    )
+                    targets = []
+                    if t4_overlay_gt:
+                        for _, row in df_frame[df_frame["source"] == "GT"].iterrows():
+                            d = target_object_from_gt_row(row.to_dict())
+                            targets.append(TargetObjectIn(**d))
+                    req = RenderRequest(
+                        t4dataset_id=_ds_t4,
+                        scenario_name=_sc_t4,
+                        frame_index=int(frame),
+                        target_objects=targets,
+                        crop_cameras=t4_crop,
+                        show_annotations=t4_show_ann,
+                    )
+                    t4_res = client.render(req)
+                    _imgs = t4_res.decode_all_images()
+                if not _imgs:
+                    st.session_state.pop("bbox_t4_last_images", None)
+                    st.session_state.pop("bbox_t4_last_meta", None)
+                    st.session_state["bbox_t4_error_key"] = _req_key
+                    st.session_state["bbox_t4_error_msg"] = (
+                        "T4 server returned no camera images for this frame. "
+                        "Check that the dataset and scenario exist on the server and the frame index is valid."
+                    )
+                    st.session_state.pop("bbox_t4_success_key", None)
+                else:
+                    st.session_state["bbox_t4_last_images"] = _imgs
+                    st.session_state["bbox_t4_last_meta"] = {
+                        "sample_token": t4_res.sample_token,
+                        "timestamp_us": t4_res.timestamp_us,
+                        "frame_index": int(frame),
+                        "t4dataset_id": _ds_t4,
+                        "scenario_name": _sc_t4,
+                    }
+                    st.session_state["bbox_t4_success_key"] = _req_key
+                    st.session_state.pop("bbox_t4_error_key", None)
+                    st.session_state.pop("bbox_t4_error_msg", None)
+            except T4VisualizerError as ex:
+                st.session_state.pop("bbox_t4_last_images", None)
+                st.session_state.pop("bbox_t4_last_meta", None)
+                st.session_state.pop("bbox_t4_success_key", None)
+                st.session_state["bbox_t4_error_key"] = _req_key
+                st.session_state["bbox_t4_error_msg"] = f"T4 server error ({ex.status_code}): {ex}"
+            except (OSError, requests.RequestException) as ex:
+                st.session_state.pop("bbox_t4_last_images", None)
+                st.session_state.pop("bbox_t4_last_meta", None)
+                st.session_state.pop("bbox_t4_success_key", None)
+                st.session_state["bbox_t4_error_key"] = _req_key
+                st.session_state["bbox_t4_error_msg"] = f"Network error: {ex}"
+            except Exception as ex:
+                st.session_state.pop("bbox_t4_last_images", None)
+                st.session_state.pop("bbox_t4_last_meta", None)
+                st.session_state.pop("bbox_t4_success_key", None)
+                st.session_state["bbox_t4_error_key"] = _req_key
+                st.session_state["bbox_t4_error_msg"] = f"T4 render failed: {ex}"
+
+        _meta = st.session_state.get("bbox_t4_last_meta")
+        _imgs = st.session_state.get("bbox_t4_last_images")
+        _show_err = st.session_state.get("bbox_t4_error_msg")
+
+        st.caption(
+            f"**Request:** t4dataset_id `{_ds_t4}` · scenario_name `{_sc_t4}` · frame_index `{frame}`"
+        )
+        if _req_key == st.session_state.get("bbox_t4_error_key") and _show_err:
+            st.caption("T4 camera preview could not be loaded.")
+            with st.expander("Details", expanded=False):
+                st.caption(
+                    f"t4dataset_id `{_ds_t4}` · scenario_name `{_sc_t4}` · frame_index `{frame}` · "
+                    f"server `{base_url_t4}`"
+                )
+                st.markdown(_show_err)
+        elif _meta and _imgs:
+            st.caption(
+                f"**sample_token** `{_meta.get('sample_token', '')}` · "
+                f"**timestamp_us** `{_meta.get('timestamp_us', '')}`"
+            )
+            _nc = min(3, max(1, len(_imgs)))
+            for _row_start in range(0, len(_imgs), _nc):
+                _cols_img = st.columns(_nc)
+                for _j, _k in enumerate(range(_row_start, min(_row_start + _nc, len(_imgs)))):
+                    _lbl, _png = _imgs[_k]
+                    with _cols_img[_j]:
+                        st.caption(_lbl)
+                        st.image(_png, use_container_width=True)
+
 # ----------------------------
 # Quick view: switch between "All (comparison)" and single-run view
 # ----------------------------
diff --git a/evaluation_dashboard_app/pages/5_T4_3D_Viewer.py b/evaluation_dashboard_app/pages/5_T4_3D_Viewer.py
new file mode 100644
index 0000000..4334181
--- /dev/null
+++ b/evaluation_dashboard_app/pages/5_T4_3D_Viewer.py
@@ -0,0 +1,494 @@
+"""T4 dataset Three.js viewer: GT / prediction / matched 3D boxes via postMessage to `/viewer/three`."""
+
+import duckdb
+import requests
+import streamlit as st
+import numpy as np
+import pandas as pd
+import os
+from pathlib import Path
+from typing import Any, List
+
+from lib.path_utils import path_display
+from lib.page_chrome import inject_app_page_styles, render_loaded_data_section, render_page_hero
+from lib.t4_dataset_embed import t4_share_query_params
+from lib.t4_three_layers import (
+    build_three_layer_payload_all_frames,
+    infer_external_bbox_alignment_query_params,
+    render_t4_three_js_embed,
+    resolve_t4_dataset_id,
+    resolve_t4_scenario,
+)
+from lib.t4_visualizer_client import (
+    DEFAULT_BASE_URL,
+    ENV_BASE_URL,
+    T4VisualizerClient,
+    T4VisualizerError,
+)
+
+st.set_page_config(
+    layout="wide",
+    page_title="T4 3D Viewer",
+    page_icon="🧊",
+    initial_sidebar_state="expanded",
+)
+inject_app_page_styles()
+
+# =============================
+# Session state from Overview (run path)
+# =============================
+if "runA" not in st.session_state:
+    st.warning("Please load data from the **Overview** page first (select mode and run(s)).")
+    st.stop()
+
+runA = st.session_state["runA"]
+mode = st.session_state.get("mode", "Single Mode")
+if mode == "Compare Mode":
+    all_runs = st.session_state.get("all_runs")
+    run_labels_state = st.session_state.get("run_labels")
+    if all_runs and run_labels_state and len(all_runs) >= 2:
+        runs = all_runs
+        run_labels_list = run_labels_state
+    else:
+        runB = st.session_state.get("runB")
+        runs = [runA] if runB is None else [runA, runB]
+        run_labels_list = ["A"] if len(runs) == 1 else ["A", "B"]
+else:
+    runs = [runA]
+    run_labels_list = ["A"]
+
+
+def list_parquets_in_run(run_path) -> List[str]:
+    p = Path(run_path)
+    if not p.is_dir():
+        return []
+    return sorted([str(f.resolve()) for f in p.glob("*.parquet")])
+
+
+parquet_lists = [list_parquets_in_run(r["path"]) for r in runs]
+for i, (r, pl) in enumerate(zip(runs, parquet_lists)):
+    if not pl:
+        lbl = run_labels_list[i] if i < len(run_labels_list) else str(i)
+        st.error(
+            f"No parquet files in run ({lbl}): {path_display(r['path'])}. "
+            "Add a .parquet file or generate one from the Download page."
+        )
+        st.stop()
+
+multi_run = len(runs) >= 2
+
+_ld_entries = []
+for i, r in enumerate(runs):
+    lbl = run_labels_list[i] if i < len(run_labels_list) else str(i)
+    if lbl == "A":
+        _ltitle = "Baseline · A"
+    else:
+        _ltitle = f"Candidate · {lbl}"
+    _ld_entries.append((_ltitle, path_display(r["path"])))
+render_loaded_data_section(_ld_entries)
+render_page_hero(
+    kicker="T4 visualizer",
+    title="T4 3D bounding box viewer",
+    description=(
+        "Embedded **Three.js** view with GT, prediction (EST), and UUID-matched pairs from parquet (**postMessage**). "
+        "Scrub **time inside the viewer** (bottom slider); eval boxes follow that frame. Same filters as the BEV page."
+    ),
+    mode=mode,
+)
+
+# ----------------------------
+# Sidebar (Filters) — shared keys with Bounding Box Viewer
+# ----------------------------
+with st.sidebar:
+    st.markdown("##### Filters")
+    st.caption("Same scene / topic / labels as the BEV viewer. Frame / playback: use the **3D viewer** controls.")
+
+    if multi_run:
+        runs_to_show = st.multiselect(
+            "Runs to show",
+            run_labels_list,
+            default=run_labels_list,
+            key="bbox_viewer_runs_to_show",
+        )
+        if not runs_to_show:
+            st.warning("Select at least one run.")
+            st.stop()
+    else:
+        runs_to_show = run_labels_list
+
+    selected_files = {}
+    for i, lbl in enumerate(run_labels_list):
+        if lbl not in runs_to_show:
+            continue
+        pl = parquet_lists[i]
+        if len(pl) == 1:
+            selected_files[lbl] = pl[0]
+        else:
+            selected_files[lbl] = st.selectbox(
+                f"File (Run {lbl})",
+                pl,
+                format_func=os.path.basename,
+                key=f"bbox_viewer_file_{lbl}",
+            )
+
+    first_shown = runs_to_show[0] if runs_to_show else run_labels_list[0]
+    filter_file = selected_files.get(first_shown) or parquet_lists[run_labels_list.index(first_shown)][0]
+
+con = duckdb.connect()
+
+cols = con.execute("DESCRIBE SELECT * FROM parquet_scan(?)", [filter_file]).df()["column_name"].tolist()
+has_visibility = "visibility" in cols
+has_suite_name = "suite_name" in cols
+has_scenario_name = "scenario_name" in cols
+has_t4dataset_name = "t4dataset_name" in cols
+hover_extra_cols = [c for c in ["z", "height", "vx", "vy", "confidence", "pointcloud_num"] if c in cols]
+
+scene_where = "1=1"
+scene_params: List[str] = [filter_file]
+
+if has_suite_name:
+    suite_list = con.execute(
+        "SELECT DISTINCT suite_name AS v FROM parquet_scan(?) WHERE suite_name IS NOT NULL ORDER BY v",
+        [filter_file],
+    ).df()["v"].dropna().astype(str).tolist()
+else:
+    suite_list = []
+
+if "bbox_viewer_link_suite" in st.session_state:
+    _lsu = st.session_state.pop("bbox_viewer_link_suite", None)
+    if suite_list and _lsu is not None and str(_lsu) in suite_list:
+        st.session_state["bbox_viewer_suite"] = str(_lsu)
+
+with st.sidebar:
+    selected_suite = None
+    selected_scenario = None
+    if suite_list:
+        selected_suite = st.selectbox(
+            "Suite name",
+            suite_list,
+            key="bbox_viewer_suite",
+        )
+    if has_scenario_name:
+        if selected_suite is not None:
+            scenario_list = con.execute(
+                "SELECT DISTINCT scenario_name AS v FROM parquet_scan(?) WHERE suite_name = ? AND scenario_name IS NOT NULL ORDER BY v",
+                [filter_file, selected_suite],
+            ).df()["v"].dropna().astype(str).tolist()
+        else:
+            scenario_list = con.execute(
+                "SELECT DISTINCT scenario_name AS v FROM parquet_scan(?) WHERE scenario_name IS NOT NULL ORDER BY v",
+                [filter_file],
+            ).df()["v"].dropna().astype(str).tolist()
+        if scenario_list:
+            if "bbox_viewer_link_scenario" in st.session_state:
+                _lsc = st.session_state.pop("bbox_viewer_link_scenario", None)
+                if _lsc is not None and str(_lsc) in scenario_list:
+                    st.session_state["bbox_viewer_scenario"] = str(_lsc)
+            selected_scenario = st.selectbox(
+                "Scenario name",
+                scenario_list,
+                key="bbox_viewer_scenario",
+            )
+    t4dataset_list: List[str] = []
+    if has_t4dataset_name:
+        t4_where_parts = ["t4dataset_name IS NOT NULL"]
+        t4_params: List[Any] = [filter_file]
+        if selected_suite is not None:
+            t4_where_parts.insert(0, "suite_name = ?")
+            t4_params.append(selected_suite)
+        if selected_scenario is not None:
+            t4_where_parts.insert(0, "scenario_name = ?")
+            t4_params.insert(1, selected_scenario)
+        t4_where = " AND ".join(t4_where_parts)
+        t4dataset_list = con.execute(
+            f"SELECT DISTINCT t4dataset_name AS v FROM parquet_scan(?) WHERE {t4_where} ORDER BY v",
+            t4_params,
+        ).df()["v"].dropna().astype(str).tolist()
+    has_multiple_t4dataset = len(t4dataset_list) > 1
+    selected_t4dataset = None
+    if has_multiple_t4dataset and t4dataset_list:
+        if "bbox_viewer_link_t4dataset" in st.session_state:
+            _lt4 = st.session_state.pop("bbox_viewer_link_t4dataset", None)
+            if _lt4 is not None and str(_lt4) in t4dataset_list:
+                st.session_state["bbox_viewer_t4dataset"] = str(_lt4)
+        selected_t4dataset = st.selectbox(
+            "t4dataset_name",
+            t4dataset_list,
+            key="bbox_viewer_t4dataset",
+        )
+
+if selected_suite is not None:
+    scene_where = "suite_name = ?"
+    scene_params = [filter_file, selected_suite]
+if selected_scenario is not None:
+    scene_where = scene_where + " AND scenario_name = ?" if scene_where != "1=1" else "scenario_name = ?"
+    scene_params = scene_params + [selected_scenario]
+if selected_t4dataset is not None:
+    scene_where = scene_where + " AND t4dataset_name = ?" if scene_where != "1=1" else "t4dataset_name = ?"
+    scene_params = scene_params + [selected_t4dataset]
+if scene_where == "1=1":
+    scene_params = [filter_file]
+
+topic_names = con.execute(
+    f"SELECT DISTINCT topic_name AS v FROM parquet_scan(?) WHERE {scene_where} ORDER BY v",
+    scene_params,
+).df()["v"].dropna().tolist()
+if not topic_names:
+    for key in (
+        "bbox_viewer_scenario",
+        "bbox_viewer_suite",
+        "bbox_viewer_link_suite",
+        "bbox_viewer_link_scenario",
+        "bbox_viewer_link_t4dataset",
+    ):
+        if key in st.session_state:
+            del st.session_state[key]
+    st.warning(
+        "No topic_name for the selected scene (from Detection Stats link). "
+        "Cleared scene selection; please choose a scene from the sidebar."
+    )
+    st.rerun()
+
+with st.sidebar:
+    selected_topic = st.selectbox("topic_name (single)", topic_names)
+
+labels = con.execute(
+    f"SELECT DISTINCT label AS v FROM parquet_scan(?) WHERE {scene_where} AND topic_name=? ORDER BY v",
+    scene_params + [selected_topic],
+).df()["v"].dropna().tolist()
+if not labels:
+    st.warning("No label for selected topic.")
+    st.stop()
+
+with st.sidebar:
+    selected_labels = st.multiselect("label(s)", labels, default=labels)
+
+selected_visibility = None
+if has_visibility:
+    vis_list = con.execute(
+        f"SELECT DISTINCT COALESCE(visibility,'UNKNOWN') AS v FROM parquet_scan(?) WHERE {scene_where} AND topic_name=? ORDER BY v",
+        scene_params + [selected_topic],
+    ).df()["v"].tolist()
+    with st.sidebar:
+        if vis_list:
+            selected_visibility = st.multiselect("visibility", vis_list, default=vis_list)
+        else:
+            st.info("No visibility values found — skipping.")
+else:
+    with st.sidebar:
+        st.info("No 'visibility' column found — skipping visibility filter.")
+
+if not selected_labels:
+    st.warning("No label selected.")
+    st.stop()
+
+with st.sidebar:
+    st.markdown("##### T4 server")
+    st.caption("**GET /datasets/{id}/availability** must succeed before the iframe loads.")
+    if "bbox_t4_base_url" not in st.session_state:
+        st.session_state["bbox_t4_base_url"] = (
+            (os.environ.get(ENV_BASE_URL) or DEFAULT_BASE_URL).strip() or DEFAULT_BASE_URL
+        )
+    st.text_input(
+        "T4 server base URL",
+        key="bbox_t4_base_url",
+        help=f"Default from env `{ENV_BASE_URL}`. Embeds `/viewer/three` and posts GT / pred / matched bbox layers.",
+    )
+
+# ----------------------------
+# Load data (same SQL as Bounding Box Viewer)
+# ----------------------------
+where = [scene_where, "topic_name = ?"]
+params = scene_params + [selected_topic]
+where.append(f"label IN ({','.join(['?']*len(selected_labels))})")
+params.extend(selected_labels)
+
+if has_visibility and selected_visibility:
+    where.append(f"COALESCE(visibility,'UNKNOWN') IN ({','.join(['?']*len(selected_visibility))})")
+    params.extend(selected_visibility)
+
+_renderer_optional_cols = [
+    "unix_time",
+    "frame_id",
+    "z",
+    "height",
+    "shape_type",
+    "vx",
+    "vy",
+    "confidence",
+    "pointcloud_num",
+    "visibility",
+    "x_error",
+    "y_error",
+    "z_error",
+    "yaw_error",
+    "vx_error",
+    "vy_error",
+    "speed_error",
+    "center_distance",
+    "plane_distance",
+    "pair_dt_sec",
+    "pair_uuid",
+    "dx_min",
+    "dy_min",
+    "t4dataset_id",
+    "suite_name",
+    "t4dataset_name",
+    "scenario_name",
+]
+_select_cols = [
+    "frame_index",
+    "x",
+    "y",
+    "length",
+    "width",
+    "yaw",
+    "label",
+    "topic_name",
+    "source",
+    "status",
+    "uuid",
+]
+_select_cols.extend(c for c in _renderer_optional_cols if c in cols and c not in _select_cols)
+sql = f"""
+SELECT {", ".join(_select_cols)}
+FROM parquet_scan(?)
+WHERE {" AND ".join(where)}
+ORDER BY frame_index
+"""
+
+files_to_load: List[tuple] = [(selected_files[lbl], lbl) for lbl in runs_to_show if lbl in selected_files]
+base_params = scene_params[1:] + [selected_topic] + list(selected_labels)
+if has_visibility and selected_visibility:
+    base_params = base_params + list(selected_visibility)
+
+dfs = []
+for file_path, run_label in files_to_load:
+    qparams = [file_path] + base_params
+    df_part = con.execute(sql, qparams).df()
+    if not df_part.empty:
+        df_part = df_part.copy()
+        df_part["run"] = run_label
+        dfs.append(df_part)
+
+if not dfs:
+    st.warning("No data matches the selected filters.")
+    st.stop()
+
+df = pd.concat(dfs, ignore_index=True)
+if len(files_to_load) == 1:
+    df["run"] = df["run"].iloc[0]
+
+if "frame_index" in df.columns and not np.issubdtype(df["frame_index"].dtype, np.integer):
+    df["frame_index"] = (
+        pd.to_numeric(df["frame_index"], errors="coerce").fillna(0).astype(int)
+    )
+
+if len(files_to_load) == 1:
+    st.info(f"**Currently showing:** Run {files_to_load[0][1]} only")
+else:
+    run_names = [f[1] for f in files_to_load]
+    st.info(f"**Currently showing:** Runs {', '.join(run_names)} — 3D layers include boxes from all selected runs.")
+
+f_min, f_max = int(df.frame_index.min()), int(df.frame_index.max())
+
+# One reference slice for resolving t4dataset_id / scenario_name (same as iframe entry frame).
+_ref_frame = f_min
+df_frame = df[df.frame_index == _ref_frame]
+if df_frame.empty and not df.empty:
+    df_frame = df.iloc[:1].copy()
+
+# ----------------------------
+# T4 Three.js embed
+# ----------------------------
+base_url_t4 = (st.session_state.get("bbox_t4_base_url") or "").strip() or DEFAULT_BASE_URL
+
+_ds_t4 = resolve_t4_dataset_id(df_frame)
+if not _ds_t4 and selected_t4dataset is not None:
+    _ds_t4 = str(selected_t4dataset)
+_sc_t4 = resolve_t4_scenario(df_frame, selected_scenario)
+
+if not _ds_t4:
+    for _k in (
+        "bbox_t4_last_images",
+        "bbox_t4_last_meta",
+        "bbox_t4_success_key",
+        "bbox_t4_error_key",
+        "bbox_t4_error_msg",
+        "bbox_t4_availability",
+    ):
+        st.session_state.pop(_k, None)
+    st.warning(
+        "Cannot resolve a T4 dataset id for this frame. Needs parquet **t4dataset_id** or **t4dataset_name**, "
+        f"or **t4dataset_name** in the sidebar when multiple datasets exist. Set **T4 server base URL** or `{ENV_BASE_URL}`."
+    )
+else:
+    _t4_avail_cache_key = f"{base_url_t4.rstrip('/')}|{_ds_t4}"
+    _cached_av = st.session_state.get("bbox_t4_availability")
+    _need_avail_fetch = _cached_av is None or _cached_av.get("cache_key") != _t4_avail_cache_key
+    if _need_avail_fetch:
+        try:
+            with st.spinner("Checking T4 dataset on the server…"):
+                _av_client = T4VisualizerClient(base_url=base_url_t4, timeout=2.0)
+                _av_data = _av_client.dataset_availability(_ds_t4)
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": True,
+                "available": bool(_av_data.get("available")),
+                "data": _av_data,
+                "error": None,
+            }
+        except T4VisualizerError as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"T4 server error ({ex.status_code}): {ex}",
+            }
+        except (OSError, requests.RequestException) as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"Network error: {ex}",
+            }
+        except Exception as ex:
+            st.session_state["bbox_t4_availability"] = {
+                "cache_key": _t4_avail_cache_key,
+                "ok": False,
+                "available": False,
+                "data": None,
+                "error": f"Availability check failed: {ex}",
+            }
+
+    _av = st.session_state.get("bbox_t4_availability") or {}
+
+    if not _av.get("ok"):
+        st.error("Could not verify the dataset on the T4 visualizer server.")
+        with st.expander("Details", expanded=False):
+            st.markdown(_av.get("error") or "Unknown error.")
+    elif not _av.get("available"):
+        st.warning("This dataset is not available on the visualizer server host.")
+        with st.expander("Details", expanded=False):
+            _d = _av.get("data")
+            if isinstance(_d, dict) and _d:
+                st.json(_d)
+            else:
+                st.markdown(
+                    "The server reported **available: false** (no local dataset path for this id on the machine "
+                    "running `t4-server`)."
+                )
+    else:
+        # Fixed entry frame so Streamlit slider does not reload the iframe; eval layers use bbox_layers_by_frame.
+        _iframe_entry_frame = int(df["frame_index"].min())
+        _q_three = t4_share_query_params(_ds_t4, _sc_t4, _iframe_entry_frame)
+        _q_three = f"{_q_three}&{infer_external_bbox_alignment_query_params(df)}"
+        _viewer_three_url = f"{base_url_t4.rstrip('/')}/viewer/three?{_q_three}"
+        _layer_payload = build_three_layer_payload_all_frames(df)
+
+        _viewer_three_h = 1400
+        render_t4_three_js_embed(_viewer_three_url, _layer_payload, height=_viewer_three_h)
+
+st.page_link("pages/4_Bounding_Box_Viewer.py", label="Back to Bounding Box & BEV viewer", icon="🖼️")
diff --git a/evaluation_dashboard_app/pages/5_Tools.py b/evaluation_dashboard_app/pages/5_Tools.py
deleted file mode 100644
index 0dc8958..0000000
--- a/evaluation_dashboard_app/pages/5_Tools.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import streamlit as st
-import re
-import subprocess
-
-from lib.page_chrome import inject_app_page_styles, render_page_hero
-
-st.set_page_config(
-    page_title="lsim_analysis_tool runner",
-    page_icon="⚙️",
-    layout="centered",
-)
-inject_app_page_styles()
-render_page_hero(
-    kicker="CLI bridge",
-    title="lsim_analysis_tool runner",
-    description=(
-        "Paste Autoware Evaluator report or suite URLs, generate shell snippets, and run analysis commands "
-        "from a simple form."
-    ),
-    mode="Single Run",
-)
-
-# Constants and regexes
-JOB_RE = re.compile(r"/reports/([0-9a-fA-F-]{36})")
-SUITE_RE = re.compile(r"/suites/([0-9a-fA-F-]{36})")
-DEFAULT_REPORT_URL = (
-    "https://evaluation.tier4.jp/evaluation/reports/"
-    "71b8eec9-7e28-5f9c-9b89-8e88545e742f?project_id=x2_dev"
-)
-DEFAULT_SUITE_URL = (
-    "https://evaluation.tier4.jp/evaluation/suites/"
-    "1af11feb-362d-4c48-b258-02cd433a3866?project_id=x2_dev"
-)
-DEFAULT_OUTPUT = "~/data/x2gen2/evaluator_summary/NO_shorten_left_lower_gpu2_No3/"
-
-def extract_job_id(report_url):
-    m = JOB_RE.search(report_url or "")
-    return m.group(1) if m else ""
-
-def extract_suite_id(suite_url):
-    m = SUITE_RE.search(suite_url or "")
-    return m.group(1) if m else ""
-
-# App state initialization
-if 'report_url' not in st.session_state:
-    st.session_state['report_url'] = DEFAULT_REPORT_URL
-if 'suite_url' not in st.session_state:
-    st.session_state['suite_url'] = DEFAULT_SUITE_URL
-
-# Layout inputs
-with st.form(key="eval_runner_form"):
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        project_id = st.text_input("Project ID", value="x2_dev", key="project_id")
-        setup_bash = st.text_area(
-            "setup.bash path",
-            value="/home/leigu/pilot-auto.x2.v4.3/install/setup.bash",
-            key="setup_bash",
-            height=120,
-            placeholder="Enter full path(s) to your setup.bash file(s), one per line."
-        )
-        output_dir = st.text_area(
-            "Output Directory",
-            value=DEFAULT_OUTPUT,
-            key="output_dir",
-            height=120,
-            placeholder="Enter one or more output directories, one per line."
-        )
-    with col2:
-        report_url = st.text_area(
-            "Report URL", 
-            value=st.session_state['report_url'], 
-            key="report_url",
-            height=120,
-            placeholder="Paste the full Evaluation Report URL here."
-        )
-        suite_url = st.text_area(
-            "Suite URL", 
-            value=st.session_state['suite_url'], 
-            key="suite_url",
-            height=120,
-            placeholder="Paste the full Evaluation Suite URL here."
-        )
-
-        # Job ID and Suite ID auto-extracted from URL text fields live as you type
-        # So always extract from form inputs (not session state nor callbacks)
-        job_id = extract_job_id(report_url)
-        suite_id = extract_suite_id(suite_url)
-
-        st.text_input("Job ID", value=job_id, key="job_id", disabled=True)
-        st.text_input("Suite ID", value=suite_id, key="suite_id", disabled=True)
-
-    # Build command
-    cmd = (
-        f"./perception_evaluation_result_creator2.sh "
-        f"{setup_bash} "
-        f"./perception_eval_result_summarizer.py "
-        f"{project_id} "
-        f"{job_id} "
-        f"{suite_id} "
-        f"{output_dir}"
-    )
-
-    # Submit button as required for Streamlit forms
-    submitted = st.form_submit_button("Run in Terminal")
-
-# "Run in Terminal" logic
-if submitted:
-    st.info(f"Command to run (copy below and paste into your terminal):\n\n{cmd}")
-
-
-st.markdown("""
----
-**Instructions:**
-- Enter your parameters above.
-- Job ID / Suite ID are automatically parsed when you enter the Evaluation URLs.
-- Click **Run in Terminal** to show the command for copy-paste.
-""")
\ No newline at end of file
diff --git a/evaluation_dashboard_app/pages/6_Download.py b/evaluation_dashboard_app/pages/6_Download.py
index 83bae3b..2222722 100644
--- a/evaluation_dashboard_app/pages/6_Download.py
+++ b/evaluation_dashboard_app/pages/6_Download.py
@@ -39,10 +39,11 @@ def _to_jst(dt: Any) -> Optional[datetime]:
 from lib.user_config import UserConfig
 from lib.path_utils import get_data_root, resolve_under_data_root, to_data_relative
 from lib.eval_summary import find_eval_result_dirs, run_eval_result_for_dir, generate_summary_and_score_csv
-from lib.page_chrome import inject_app_page_styles
+from lib.page_chrome import (
+    inject_app_page_styles,
+)
 from lib.ui.download_ui import (
     ImpressiveProgressHUD,
-    TaskCardMode,
     render_detailed_scenario_download_panel,
     render_download_hero,
     render_download_status_table_intro,
@@ -51,11 +52,11 @@ def _to_jst(dt: Any) -> Optional[datetime]:
     render_job_json_summary_panel,
     render_recent_scenario_downloads_intro,
     render_scenario_download_summary_panel,
-    render_task_list_empty_state,
-    task_list_card_markup,
 )
+from lib.ui.task_history import get_task_list_current_user, render_task_list
 from lib.ui.styles_download import inject_download_page_styles
 from lib.db import (
+    count_recent_tasks,
     create_task,
     delete_task,
     get_task,
@@ -64,6 +65,7 @@ def _to_jst(dt: Any) -> Optional[datetime]:
     update_task_rq_job_id,
 )
 from lib import download_core
+from lib import evaluator_api
 from lib.auth import get_current_user_id, is_auth_enabled
 
 try:
@@ -75,6 +77,12 @@ def _to_jst(dt: Any) -> Optional[datetime]:
 # Task queue panel: time window + row cap (must match header + list_recent_tasks)
 _TASK_LIST_SINCE_DAYS = 7
 _TASK_LIST_MAX_ROWS = 200
+_TASK_HISTORY_RANGE_OPTIONS = {
+    "7 days": 7,
+    "30 days": 30,
+    "90 days": 90,
+    "All": None,
+}
 
 def _parse_rq_timeout_sec(raw: Optional[str], *, default: int, minimum: int) -> int:
     if raw is None or not str(raw).strip():
@@ -103,6 +111,71 @@ def _parse_rq_timeout_sec(raw: Optional[str], *, default: int, minimum: int) ->
 else:
     _BUILD_PARQUET_JOB_TIMEOUT_SEC = _RQ_DEFAULT_JOB_TIMEOUT_SEC
 
+_DEFAULT_EVAL_WORKERS = 4
+
+
+def _default_eval_workers() -> int:
+    try:
+        workers = int(os.environ.get("EVAL_WORKERS_DEFAULT", _DEFAULT_EVAL_WORKERS))
+    except (TypeError, ValueError):
+        workers = _DEFAULT_EVAL_WORKERS
+    return max(1, min(workers, 16))
+
+
+_APP_ROOT = Path(__file__).resolve().parents[1]
+_CATALOGS_FILENAME = "catalogs.json"
+_LEGACY_CATALOGS_PATH = Path("/home/leigu/EvaluatorRunnerUITest/catalogs.json")
+
+
+def _catalog_preset_candidate_paths() -> List[Path]:
+    """Return catalog preset paths in priority order."""
+    paths: List[Path] = []
+    env_path = os.environ.get("EVAL_CATALOGS_PATH")
+    if env_path:
+        paths.append(Path(env_path).expanduser())
+
+    paths.extend(
+        [
+            _APP_ROOT / _CATALOGS_FILENAME,
+            Path.cwd() / _CATALOGS_FILENAME,
+            _LEGACY_CATALOGS_PATH,
+        ]
+    )
+
+    unique_paths: List[Path] = []
+    seen = set()
+    for path in paths:
+        key = os.fspath(path)
+        if key not in seen:
+            unique_paths.append(path)
+            seen.add(key)
+    return unique_paths
+
+
+def _load_catalog_presets() -> tuple[List[Dict[str, Any]], Optional[Path], Optional[str]]:
+    """Load evaluator catalog presets from the first available catalogs.json."""
+    required_keys = {"display_name", "catalog_id", "integration_id"}
+    for path in _catalog_preset_candidate_paths():
+        if not path.is_file():
+            continue
+
+        try:
+            with path.open("r", encoding="utf-8") as f:
+                presets = json.load(f)
+            if not isinstance(presets, list):
+                raise ValueError("catalog preset file must contain a JSON list")
+
+            valid_presets = [
+                preset
+                for preset in presets
+                if isinstance(preset, dict) and required_keys.issubset(preset)
+            ]
+            return valid_presets, path, None
+        except Exception as exc:
+            return [], path, str(exc)
+
+    return [], None, None
+
 
 def _enqueue_task(
     task_type: str,
@@ -775,345 +848,79 @@ def download_scenarios(
 
 
 
-def _task_type_label(task_type: str) -> str:
-    """Human-readable label for task type."""
-    labels = {
-        "download_results": "Download results",
-        "download_scenarios": "Download scenarios",
-        "run_eval_dirs": "Run eval dirs",
-        "generate_summary_csv": "Generate summary CSV",
-        "build_parquet": "Build parquet",
-    }
-    return labels.get(task_type, task_type or "Task")
-
-
-def _task_summary(t: Dict[str, Any]) -> str:
-    """One-line summary from task parameters (job_id, output_path, etc.)."""
-    params = t.get("parameters") or {}
-    task_type = t.get("type", "")
-    if task_type == "download_results":
-        out = params.get("output_path") or params.get("job_id") or ""
-        return f"job_id={params.get('job_id', '')} → {out}"
-    if task_type == "download_scenarios":
-        out = params.get("output_dir") or params.get("output_path") or ""
-        return f"job_id={params.get('job_id', '')} → {out}"
-    if task_type in ("run_eval_dirs", "generate_summary_csv"):
-        return params.get("eval_root", "")
-    if task_type == "build_parquet":
-        return params.get("pkl_dir", "")
-    return ""
-
-
-def _task_time_str(t: Dict[str, Any]) -> str:
-    """Format task created_at for display in JST (e.g. 'Feb 24, 16:45')."""
-    created = t.get("created_at")
-    dt = _to_jst(created) if created else None
-    if not dt:
-        return "—"
-    try:
-        return dt.strftime("%b %d, %H:%M")
-    except Exception:
-        return str(created)[:16] if created else "—"
-
-
-def _task_duration(t: Dict[str, Any]) -> Optional[str]:
-    """Format duration from created_at to updated_at if both exist."""
-    created = t.get("created_at")
-    updated = t.get("updated_at")
-    if not created or not updated:
-        return None
-    try:
-        start = created.timestamp() if hasattr(created, "timestamp") else None
-        end = updated.timestamp() if hasattr(updated, "timestamp") else None
-        if start is None or end is None:
-            return None
-        secs = int(end - start)
-        if secs < 60:
-            return f"{secs}s"
-        if secs < 3600:
-            return f"{secs // 60}m {secs % 60}s"
-        return f"{secs // 3600}h {(secs % 3600) // 60}m"
-    except Exception:
-        return None
-
-
-def _render_summary_table(rows: Optional[List[Dict[str, Any]]]) -> None:
-    """Render a summary table from rows (e.g. Scenario Name, Scenario ID, Status) when present."""
-    if not rows:
-        return
-    try:
-        df = pd.DataFrame(rows)
-        st.subheader("Download Status")
-        st.dataframe(df, width="stretch")
-    except Exception:
-        pass
-
-
-def _render_result_summary(summary: Dict[str, Any]) -> None:
-    """Render a result summary block (like local mode) from task result_summary JSON."""
-    job = summary.get("job", "")
-    if job == "download_results":
-        total = summary.get("total", 0)
-        success = summary.get("success", 0)
-        failed = summary.get("failed", 0)
-        out = summary.get("output_path", "")
-        st.subheader("Summary")
-        st.write(f"- Total scenarios processed: **{total}**")
-        st.write(f"- Successfully downloaded: **{success}**")
-        if failed:
-            st.write(f"- Failed: **{failed}**")
-        st.write(f"- Output directory: `{out}`")
-        if success > 0:
-            st.info("To generate the final summary CSV files, go to the **Eval Results** tab and run the evaluation.")
-        _render_summary_table(summary.get("rows"))
-    elif job == "download_scenarios":
-        total = summary.get("total", 0)
-        success = summary.get("success", 0)
-        failed = summary.get("failed", 0)
-        out = summary.get("output_path", "")
-        st.subheader("Summary")
-        st.write(f"- Total scenarios: **{total}**")
-        st.write(f"- Successfully downloaded: **{success}**")
-        if failed:
-            st.write(f"- Failed: **{failed}**")
-        st.write(f"- Result JSON files: **{total}** downloaded.")
-        st.write(f"- Output directory: `{out}`")
-        if success > 0:
-            st.info("To generate summary CSV files, go to the **Eval Results** tab and run the evaluation.")
-        _render_summary_table(summary.get("rows"))
-    elif job == "run_eval_dirs":
-        dirs = summary.get("directories_processed", 0)
-        path = summary.get("summary_path", "")
-        srows = summary.get("summary_rows", 0)
-        scrows = summary.get("score_rows", 0)
-        st.subheader("Eval Summary")
-        st.write(f"- Directories processed: **{dirs}**")
-        st.write(f"- Generated Summary.csv (**{srows}** rows) and Score.csv (**{scrows}** rows) in `{path}`")
-    elif job == "generate_summary_csv":
-        path = summary.get("summary_path", "")
-        srows = summary.get("summary_rows", 0)
-        scrows = summary.get("score_rows", 0)
-        st.subheader("Summary")
-        st.write(f"- Generated Summary.csv (**{srows}** rows) and Score.csv (**{scrows}** rows) in `{path}`")
-    elif job == "build_parquet":
-        path = summary.get("output_path", "")
-        st.subheader("Summary")
-        st.write(f"- Output: `{path}`")
-    else:
-        st.json(summary)
-
-
-def _render_task_detail_content(t: Dict[str, Any]) -> None:
-    """Render full task detail (summary, path, error, log, params) into current container."""
-    try:
-        _render_task_detail_content_impl(t)
-    except Exception as e:
-        st.error(f"Could not load task details: {e}")
-        import traceback
-        st.code(traceback.format_exc(), language=None)
-
-
-def _render_task_detail_content_impl(t: Dict[str, Any]) -> None:
-    """Implementation of task detail rendering (called inside try/except)."""
-    status = t.get("status", "")
-    created_jst = _to_jst(t.get("created_at"))
-    updated_jst = _to_jst(t.get("updated_at"))
-    time_parts = []
-    if created_jst:
-        try:
-            time_parts.append(f"Created: {created_jst.strftime('%Y-%m-%d %H:%M:%S')} JST")
-        except Exception:
-            time_parts.append(f"Created: {t.get('created_at')}")
-    if updated_jst and updated_jst != created_jst:
-        try:
-            time_parts.append(f"Updated: {updated_jst.strftime('%Y-%m-%d %H:%M:%S')} JST")
-        except Exception:
-            time_parts.append(f"Updated: {t.get('updated_at')}")
-    if time_parts:
-        st.caption(" · ".join(time_parts))
-    result_summary_raw = t.get("result_summary")
-    if result_summary_raw:
-        try:
-            result_summary = json.loads(result_summary_raw) if isinstance(result_summary_raw, str) else result_summary_raw
-            _render_result_summary(result_summary)
-            st.markdown("---")
-        except (TypeError, ValueError):
-            pass
-    if t.get("result_path"):
-        st.text_input("Result path", value=t["result_path"], key=f"rp_modal_{str(t.get('id'))}", disabled=True, label_visibility="collapsed")
-    if status == "failed" and t.get("error_message"):
-        st.error(t.get("error_message"))
-    log_output = (t.get("log_output") or "").strip()
-    if log_output:
-        st.caption("Log output")
-        st.code(log_output, language=None)
-    params = t.get("parameters") or {}
-    if params:
-        st.caption("Parameters")
-        st.json(params)
-
-
-def _open_task_detail(task_id: str) -> None:
-    st.session_state["_task_detail_id"] = str(task_id)
-
-
-def _render_one_task_row(
-    t: Dict[str, Any],
-    current_user: Optional[str],
-    use_dialog: bool,
-    *,
-    mode: TaskCardMode,
-) -> None:
-    """One task: compact card + View/Delete (and inline More when no dialog)."""
-    task_id = t.get("id", "")
-    task_type = t.get("type", "")
-    status = t.get("status", "")
-    status_labels = {"pending": "Pending", "running": "Running", "completed": "Completed", "failed": "Failed"}
-    status_label = status_labels.get(status, status)
-    type_label = _task_type_label(task_type)
-    summary = _task_summary(t)
-    duration = _task_duration(t) or "—"
-    time_str = _task_time_str(t)
-    sid = str(task_id)
-    if mode == "history":
-        summary_short = (summary[:72] + "…") if summary and len(summary) > 72 else (summary or "—")
-    else:
-        summary_short = "—"
-    progress_msg = (t.get("progress_message") or "").strip()
-    _card = task_list_card_markup(
-        task_id=sid,
-        type_label=type_label,
-        status=status,
-        status_label=status_label,
-        time_str=time_str,
-        duration=duration,
-        summary_short=summary_short,
-        progress_pct=t.get("progress_pct"),
-        progress_message=progress_msg,
-        mode=mode,
-    )
-    st.markdown(f'<div class="dl-task-stack">{_card}</div>', unsafe_allow_html=True)
-
-    if use_dialog:
-        bv, bd, _sp = st.columns([1.15, 1.15, 4])
-        with bv:
-            st.button("View", key=f"view_{sid}", on_click=_open_task_detail, args=(sid,))
-        with bd:
-            _stop_lbl = "Stop" if status in ("pending", "running") else "Remove"
-            _stop_help = (
-                "Cancels the Redis/RQ job when possible, then removes this row from the list."
-                if status in ("pending", "running")
-                else "Remove this row from the task list."
-            )
-            if st.button(
-                _stop_lbl,
-                key=f"del_{sid}",
-                type="secondary",
-                help=_stop_help,
-            ):
-                delete_task(sid, session_id=current_user)
-                st.rerun()
-    else:
-        bd, _sp = st.columns([1.15, 4])
-        with bd:
-            _stop_lbl = "Stop" if status in ("pending", "running") else "Remove"
-            _stop_help = (
-                "Cancels the Redis/RQ job when possible, then removes this row from the list."
-                if status in ("pending", "running")
-                else "Remove this row from the task list."
-            )
-            if st.button(
-                _stop_lbl,
-                key=f"del_{sid}",
-                type="secondary",
-                help=_stop_help,
-            ):
-                delete_task(sid, session_id=current_user)
-                st.rerun()
-
-    if not use_dialog:
-        with st.expander("More", expanded=False):
-            _render_task_detail_content(t)
-
-
-def _render_task_list(tasks: List[Dict[str, Any]], current_user: Optional[str]) -> bool:
-    """Active tasks visible; completed/failed in a collapsed expander. True if any active."""
-    if current_user:
-        st.caption(f"Logged in as **{current_user}** · your recent tasks only")
-    if not tasks:
-        render_task_list_empty_state()
-        return False
-
-    active = [t for t in tasks if t.get("status") in ("pending", "running")]
-    history = [t for t in tasks if t.get("status") not in ("pending", "running")]
-    use_dialog = callable(getattr(st, "dialog", None))
-
-    for t in active:
-        _render_one_task_row(t, current_user, use_dialog, mode="active_compact")
-
-    if not active:
-        st.caption("No queued or running jobs.")
-
-    if history:
-        with st.expander(f"Task history ({len(history)})", expanded=False):
-            for t in history:
-                _render_one_task_row(t, current_user, use_dialog, mode="history")
-
-    # Modal for task detail when dialog is available
-    if use_dialog and st.session_state.get("_task_detail_id"):
-        _task_id = st.session_state["_task_detail_id"]
-        try:
-            detail_task = next((x for x in tasks if str(x.get("id")) == _task_id), None)
-            if detail_task is None:
-                detail_task = get_task(_task_id)
-            if detail_task:
-
-                @st.dialog("Task details", width="large")
-                def _task_detail_modal():
-                    _render_task_detail_content(detail_task)
-                    if st.button("Close"):
-                        st.session_state.pop("_task_detail_id", None)
-                        st.rerun()
-
-                _task_detail_modal()
-        except Exception as e:
-            st.error(f"Could not open task details: {e}")
-        finally:
-            # Clear so X/outside click or error doesn't leave page stuck; next run shows main content
-            st.session_state.pop("_task_detail_id", None)
-
-    return len(active) > 0
-
-
 # Task queue status (production deployment); per-user when auth is enabled
 _current_user = None
 if is_task_queue_enabled():
-    _current_user = get_current_user_id() if is_auth_enabled() else None
+    _current_user = get_task_list_current_user()
     render_download_task_section_header(
         since_days=_TASK_LIST_SINCE_DAYS,
         max_rows=_TASK_LIST_MAX_ROWS,
     )
+    if "download_task_history_range" not in st.session_state:
+        st.session_state["download_task_history_range"] = "7 days"
+    if "download_task_history_page_size" not in st.session_state:
+        st.session_state["download_task_history_page_size"] = 20
+    if "download_task_history_page" not in st.session_state:
+        st.session_state["download_task_history_page"] = 1
+
+    _control_cols = st.columns([1.3, 1.0, 1.0, 2.7])
+    with _control_cols[0]:
+        _selected_range = st.selectbox(
+            "Task history range",
+            options=list(_TASK_HISTORY_RANGE_OPTIONS.keys()),
+            key="download_task_history_range",
+        )
+    with _control_cols[1]:
+        _page_size = int(
+            st.selectbox(
+                "Task rows",
+                options=[20, 50, 100],
+                key="download_task_history_page_size",
+            )
+        )
+    _since_days = _TASK_HISTORY_RANGE_OPTIONS.get(_selected_range, _TASK_LIST_SINCE_DAYS)
+    _total_tasks = count_recent_tasks(session_id=_current_user, since_days=_since_days)
+    _page_count = max(1, (_total_tasks + _page_size - 1) // _page_size) if _total_tasks else 1
+    _current_page = min(max(1, int(st.session_state.get("download_task_history_page", 1))), _page_count)
+    st.session_state["download_task_history_page"] = _current_page
+    with _control_cols[2]:
+        _selected_page = st.selectbox(
+            "Task page",
+            options=list(range(1, _page_count + 1)),
+            index=_current_page - 1,
+            key="download_task_history_page_select",
+        )
+        if int(_selected_page) != _current_page:
+            _current_page = int(_selected_page)
+            st.session_state["download_task_history_page"] = _current_page
+    with _control_cols[3]:
+        _range_label = _selected_range if _since_days is not None else "all time"
+        st.caption(f"Showing **{_total_tasks}** tasks across **{_page_count}** page(s) for **{_range_label}**.")
+
+    _offset = (_current_page - 1) * _page_size
     _use_fragment = getattr(st, "fragment", None) is not None
     if _use_fragment:
         try:
             @st.fragment(run_every=timedelta(seconds=3))
             def _task_list_poll():
                 _t = list_recent_tasks(
-                    limit=_TASK_LIST_MAX_ROWS,
+                    limit=_page_size,
+                    offset=_offset,
                     session_id=_current_user,
-                    since_days=_TASK_LIST_SINCE_DAYS,
+                    since_days=_since_days,
                 )
-                _render_task_list(_t, _current_user)
+                render_task_list(_t, _current_user)
             _task_list_poll()
         except (TypeError, AttributeError):
             _use_fragment = False
     if not _use_fragment:
         tasks = list_recent_tasks(
-            limit=_TASK_LIST_MAX_ROWS,
+            limit=_page_size,
+            offset=_offset,
             session_id=_current_user,
-            since_days=_TASK_LIST_SINCE_DAYS,
+            since_days=_since_days,
         )
-        has_active = _render_task_list(tasks, _current_user)
+        has_active = render_task_list(tasks, _current_user)
         if st.button("Refresh task list", key="refresh_tasks"):
             st.rerun()
         if has_active:
@@ -1136,6 +943,1559 @@ def _run_eval_result_worker(result_dir: str, overwrite: bool) -> Dict[str, Any]:
     return run_eval_result_for_dir(result_dir, overwrite=overwrite)
 
 
+def _parse_api_dt(value: Any) -> Optional[datetime]:
+    """Parse evaluator API timestamps into timezone-aware datetimes."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        if getattr(value, "tzinfo", None) is None:
+            return value.replace(tzinfo=timezone.utc)
+        return value
+    try:
+        text = str(value).strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        dt = datetime.fromisoformat(text)
+        if getattr(dt, "tzinfo", None) is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt
+    except Exception:
+        return None
+
+
+def _format_jst_time(value: Any, *, include_seconds: bool = False) -> str:
+    """Format timestamps for display in JST."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "—"
+    return dt.strftime("%Y-%m-%d %H:%M:%S JST" if include_seconds else "%Y-%m-%d %H:%M JST")
+
+
+def _format_jst_time_compact(value: Any) -> str:
+    """Compact timestamp for dense recent-job rows."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "—"
+    return dt.strftime("%m-%d %H:%M")
+
+
+def _format_jst_time_title(value: Any) -> str:
+    """Readable timestamp for fallback job titles."""
+    dt = _to_jst(_parse_api_dt(value))
+    if not dt:
+        return "unknown time"
+    return f"{dt.year}/{dt.month}/{dt.day} {dt.hour}:{dt.minute:02d}:{dt.second:02d}"
+
+
+def _format_relative_time(value: Any) -> str:
+    """Human-friendly age/duration from a timestamp until now."""
+    dt = _parse_api_dt(value)
+    if not dt:
+        return "—"
+    now = datetime.now(timezone.utc)
+    secs = max(0, int((now - dt.astimezone(timezone.utc)).total_seconds()))
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _format_duration(start_value: Any, end_value: Any) -> str:
+    """Format elapsed duration between two evaluator timestamps."""
+    start = _parse_api_dt(start_value)
+    end = _parse_api_dt(end_value)
+    if not start or not end:
+        return "—"
+    secs = max(0, int((end - start).total_seconds()))
+    if secs < 60:
+        return f"{secs}s"
+    if secs < 3600:
+        return f"{secs // 60}m {secs % 60}s"
+    return f"{secs // 3600}h {(secs % 3600) // 60}m"
+
+
+def _extract_git_target(report: Dict[str, Any]) -> str:
+    """Return a compact branch/tag label from evaluator job report metadata."""
+    source = ((report.get("event") or {}).get("source") or {})
+    git_ref = str(source.get("git_ref") or "").strip()
+    if git_ref.startswith("refs/heads/"):
+        return git_ref[len("refs/heads/"):]
+    if git_ref.startswith("refs/tags/"):
+        return git_ref[len("refs/tags/"):]
+    return git_ref or str(source.get("git_sha") or "").strip()[:12] or "—"
+
+
+def _extract_catalog_url(report: Dict[str, Any]) -> str:
+    """Return a best-effort catalog URL for linking from recent evaluator jobs."""
+    catalog = report.get("catalog") or {}
+    direct_url = str(
+        catalog.get("web_url")
+        or catalog.get("url")
+        or catalog.get("catalog_url")
+        or ""
+    ).strip()
+    if direct_url:
+        return direct_url
+
+    project_id = str(report.get("project_id") or "").strip()
+    catalog_id = str(
+        catalog.get("catalog_id")
+        or catalog.get("id")
+        or ""
+    ).strip()
+    if project_id and catalog_id:
+        return f"https://evaluation.tier4.jp/evaluation/vehicle_catalogs/{catalog_id}?project_id={project_id}"
+    return ""
+
+
+def _extract_job_title(report: Dict[str, Any]) -> str:
+    """Prefer evaluator description for display title, with a readable fallback."""
+    description = str(report.get("description") or "").strip()
+    if description:
+        return description
+    started_like = report.get("started_at") or report.get("scheduled_at") or report.get("finished_at")
+    return f"no description (Started at {_format_jst_time_title(started_like)})"
+
+
+def _extract_case_totals(report: Dict[str, Any]) -> Dict[str, int]:
+    """Return total/success/failed/canceled counts from job report."""
+    test = report.get("test") or {}
+    result = test.get("available_case_results") or test.get("case_results") or {}
+    return {
+        "total": int(result.get("total_count", 0) or 0),
+        "success": int(result.get("success_count", 0) or 0),
+        "failed": int(result.get("failure_count", 0) or 0),
+        "canceled": int(result.get("cancellation_count", 0) or 0),
+    }
+
+
+def _extract_failed_case_rows(case_reports: List[Dict[str, Any]], *, limit: int = 50) -> List[Dict[str, Any]]:
+    """Normalize failed case rows for display tables."""
+    rows: List[Dict[str, Any]] = []
+    for report in case_reports:
+        status = str(report.get("status") or "").strip().lower()
+        result_status = str(((report.get("result") or {}).get("status") or "")).strip().lower()
+        if status not in evaluator_api.FAILED_JOB_STATUSES and result_status not in evaluator_api.FAILED_JOB_STATUSES:
+            continue
+        logs = report.get("logs") or {}
+        rows.append(
+            {
+                "Suite": ((report.get("suite") or {}).get("display_name") or ""),
+                "Scenario": ((report.get("scenario") or {}).get("display_name") or ""),
+                "Status": report.get("status", ""),
+                "Fail message": report.get("fail_message", ""),
+                "Cause": ", ".join(report.get("failure_cause_labels", []) or []),
+                "Archive log": "yes" if ((logs.get("simulation_archive") or {}).get("id")) else "no",
+                "Result JSON": "yes" if ((logs.get("simulation_result_json") or {}).get("id")) else "no",
+            }
+        )
+    rows.sort(key=lambda row: (row["Suite"], row["Scenario"], row["Fail message"]))
+    return rows[:limit]
+
+
+def _extract_suite_rows(suite_rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Normalize suite summary rows for display tables."""
+    rows = [
+        {
+            "Suite": row.get("name", ""),
+            "Total": int(row.get("all", 0) or 0),
+            "Success": int(row.get("success", 0) or 0),
+            "Failed": int(row.get("fail", 0) or 0),
+            "Canceled": int(row.get("cancel", 0) or 0),
+            "Simulation": row.get("simulation", ""),
+            "Report": row.get("url", ""),
+        }
+        for row in suite_rows or []
+    ]
+    rows.sort(key=lambda row: (-row["Failed"], row["Suite"]))
+    return rows
+
+
+def _extract_suite_selection_options(suite_rows: List[Dict[str, Any]]) -> List[Dict[str, str]]:
+    """Build suite picker options from evaluator suite summary rows."""
+    options: List[Dict[str, str]] = []
+    seen_ids = set()
+    for row in suite_rows or []:
+        report_url = str(row.get("url") or row.get("Report") or "").strip()
+        suite_id = ""
+        if "/tests/" in report_url:
+            tail = report_url.split("/tests/", 1)[1]
+            suite_id = tail.split("?", 1)[0].split("/", 1)[0].strip()
+        if not suite_id or suite_id in seen_ids:
+            continue
+        seen_ids.add(suite_id)
+        suite_name = str(row.get("name") or row.get("Suite") or suite_id).strip()
+        options.append({"id": suite_id, "label": f"{suite_name} ({suite_id})"})
+    return options
+
+
+def _status_color_variant(status: str) -> str:
+    """Map evaluator status to a style token used by the recent-job cards."""
+    normalized = evaluator_api.normalize_job_status(status)
+    if normalized in evaluator_api.SUCCESS_JOB_STATUSES:
+        return "success"
+    if normalized in ("canceled", "cancelled", "aborted"):
+        return "canceled"
+    if normalized in evaluator_api.FAILED_JOB_STATUSES:
+        return "failed"
+    if normalized in ("started", "running", "pending", "queued", "created"):
+        return "running"
+    return "unknown"
+
+
+def _status_display_label(status: str) -> str:
+    """Short status label for compact list rows."""
+    normalized = evaluator_api.normalize_job_status(status)
+    if normalized in ("succeeded", "success"):
+        return "success"
+    if normalized in ("failed", "failure", "error"):
+        return "failed"
+    if normalized in ("canceled", "cancelled", "aborted"):
+        return "canceled"
+    if normalized in ("started", "running"):
+        return "running"
+    if normalized in ("pending", "queued", "created"):
+        return "queued"
+    return normalized or "unknown"
+
+
+def _status_filter_values(selected_statuses: List[str]) -> List[str]:
+    """Normalize UI status filters into API status values."""
+    values: List[str] = []
+    for raw in selected_statuses:
+        normalized = evaluator_api.normalize_job_status(raw)
+        if normalized == "unknown" or not normalized:
+            continue
+        if normalized == "running":
+            values.extend(["running", "started"])
+        elif normalized == "success":
+            values.extend(["success", "succeeded"])
+        elif normalized == "failed":
+            values.extend(["failed", "failure", "error"])
+        elif normalized == "canceled":
+            values.extend(["canceled", "cancelled", "aborted"])
+        else:
+            values.append(normalized)
+    return sorted(set(values))
+
+
+def _escape_search_match_value(value: str) -> str:
+    """Escape wildcard characters for API Match filters."""
+    return (
+        value.replace("\\", "\\\\")
+        .replace("*", "\\*")
+        .replace("?", "\\?")
+    )
+
+
+def _build_recent_job_search_filter(
+    search_text: str,
+    search_scope: str,
+    user_directory: Optional[Dict[str, Dict[str, str]]] = None,
+) -> tuple[Optional[Dict[str, Any]], str]:
+    """Map quick-search UI to one server-side filter and a client-side needle."""
+    needle = search_text.strip()
+    if not needle:
+        return None, ""
+
+    if search_scope == "Branch/tag":
+        return (
+            {
+                "field": "event.source.git_ref",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Description":
+        return (
+            {
+                "field": "description",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Job ID":
+        return (
+            {
+                "field": "job_id",
+                "operator": "In",
+                "values": [needle],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Git SHA":
+        return (
+            {
+                "field": "event.source.git_sha",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    if search_scope == "Fail message":
+        return (
+            {
+                "field": "fail_message",
+                "operator": "Match",
+                "values": [f"*{_escape_search_match_value(needle)}*"],
+            },
+            needle.lower(),
+        )
+    return None, needle.lower()
+
+
+def _recent_job_search_history_key(scope: str) -> str:
+    return f"recent_eval_jobs_search_history::{scope}"
+
+
+def _get_recent_job_search_history(scope: str) -> List[str]:
+    stored = get_config_value(_recent_job_search_history_key(scope), []) or []
+    if not isinstance(stored, list):
+        return []
+    return [str(v).strip() for v in stored if str(v).strip()]
+
+
+def _save_recent_job_search_history(scope: str, value: str, *, max_items: int = 8) -> None:
+    text = str(value).strip()
+    if not text:
+        return
+    history = _get_recent_job_search_history(scope)
+    updated = [text] + [item for item in history if item != text]
+    set_config_value(_recent_job_search_history_key(scope), updated[:max_items])
+
+
+def _get_recent_eval_user_directory() -> Dict[str, Dict[str, str]]:
+    stored = get_config_value("recent_eval_jobs_user_directory", {}) or {}
+    if not isinstance(stored, dict):
+        return {}
+    normalized: Dict[str, Dict[str, str]] = {}
+    for subject_id, info in stored.items():
+        if not isinstance(info, dict):
+            continue
+        normalized[str(subject_id)] = {
+            "name": str(info.get("name") or "").strip(),
+            "email": str(info.get("email") or "").strip(),
+            "subject_id": str(info.get("subject_id") or subject_id).strip(),
+        }
+    return normalized
+
+
+def _save_recent_eval_user_directory(directory: Dict[str, Dict[str, str]]) -> None:
+    set_config_value("recent_eval_jobs_user_directory", directory)
+
+
+@st.cache_data(ttl=24 * 3600, show_spinner=False)
+def _fetch_auth_member_profile(subject_id: str, environment: str) -> Dict[str, str]:
+    subject = str(subject_id or "").strip()
+    if not subject:
+        return {}
+    org_id = os.environ.get(
+        "WEBAUTO_ORGANIZATION_ID",
+        "5a21621d-6968-4f7d-94f8-99cfb77b6e71",
+    ).strip()
+    if not org_id:
+        return {"subject_id": subject, "name": subject, "email": ""}
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    from webautoauth.token import HttpService, TokenSource, load_config
+
+    config = load_config()
+    token_source = TokenSource(HttpService(config))
+    access_token = token_source.get_token().access_token
+    quoted_subject = urllib.parse.quote(subject, safe="")
+    url = f"https://auth.web.auto/v2/organizations/{org_id}/members/{quoted_subject}"
+    response = requests.get(
+        url,
+        headers={"Authorization": f"Bearer {access_token}", "accept": "application/json"},
+        timeout=10,
+    )
+    response.raise_for_status()
+    data = response.json()
+    return {
+        "subject_id": str(data.get("subject_id") or subject),
+        "name": str(data.get("name") or subject).strip(),
+        "email": str(data.get("email") or "").strip(),
+    }
+
+
+def _hydrate_recent_eval_user_directory(
+    jobs: List[Dict[str, Any]],
+    environment: str,
+) -> Dict[str, Dict[str, str]]:
+    directory = _get_recent_eval_user_directory()
+    unresolved = sorted(
+        {
+            str(job.get("scheduled_by") or "").strip()
+            for job in jobs
+            if str(job.get("scheduled_by") or "").strip()
+            and str(job.get("scheduled_by") or "").strip() not in directory
+        }
+    )
+    if not unresolved:
+        return directory
+
+    updates: Dict[str, Dict[str, str]] = {}
+    with ThreadPoolExecutor(max_workers=min(6, len(unresolved))) as executor:
+        future_map = {
+            executor.submit(_fetch_auth_member_profile, subject_id, environment): subject_id
+            for subject_id in unresolved
+        }
+        for future in as_completed(future_map):
+            subject_id = future_map[future]
+            try:
+                profile = future.result()
+            except Exception:
+                profile = {
+                    "subject_id": subject_id,
+                    "name": subject_id,
+                    "email": "",
+                }
+            updates[subject_id] = {
+                "subject_id": str(profile.get("subject_id") or subject_id).strip(),
+                "name": str(profile.get("name") or subject_id).strip(),
+                "email": str(profile.get("email") or "").strip(),
+            }
+
+    if updates:
+        directory = {**directory, **updates}
+        _save_recent_eval_user_directory(directory)
+    return directory
+
+
+def _build_recent_job_date_filters(
+    date_from: Optional[datetime.date],
+    date_to: Optional[datetime.date],
+) -> List[Dict[str, Any]]:
+    """Build scheduled_at date-range filters for the search API."""
+    filters: List[Dict[str, Any]] = []
+    if date_from:
+        start_dt = datetime(date_from.year, date_from.month, date_from.day, 0, 0, 0, tzinfo=_JST)
+        filters.append(
+            {
+                "field": "scheduled_at",
+                "operator": "Gte",
+                "values": [start_dt.astimezone(timezone.utc).isoformat()],
+            }
+        )
+    if date_to:
+        end_dt = datetime(date_to.year, date_to.month, date_to.day, 23, 59, 59, tzinfo=_JST)
+        filters.append(
+            {
+                "field": "scheduled_at",
+                "operator": "Lte",
+                "values": [end_dt.astimezone(timezone.utc).isoformat()],
+            }
+        )
+    return filters
+
+
+def _summarize_recent_job(report: Dict[str, Any]) -> Dict[str, Any]:
+    """Compact summary for one evaluator job card."""
+    status = evaluator_api.extract_job_status(report)
+    totals = _extract_case_totals(report)
+    source = ((report.get("event") or {}).get("source") or {})
+    git_url = str(source.get("git_web_url") or source.get("git_url") or "").strip()
+    source_repo_label = git_url.rstrip("/").split("/")[-1] if git_url else "—"
+    git_ref_label = _extract_git_target(report)
+    return {
+        "job_id": report.get("job_id") or report.get("id") or "",
+        "title": _extract_job_title(report),
+        "status": status,
+        "status_variant": _status_color_variant(status),
+        "build_status": ((report.get("build") or {}).get("status") or ""),
+        "test_status": ((report.get("test") or {}).get("status") or ""),
+        "target": git_ref_label,
+        "catalog": ((report.get("catalog") or {}).get("display_name") or ""),
+        "catalog_url": _extract_catalog_url(report),
+        "description": report.get("description", ""),
+        "source_label": git_ref_label,
+        "source_repo_label": source_repo_label,
+        "scheduled_at": report.get("scheduled_at"),
+        "started_at": report.get("started_at"),
+        "finished_at": report.get("finished_at"),
+        "duration": _format_duration(report.get("started_at"), report.get("finished_at")),
+        "created_label": _format_relative_time(report.get("scheduled_at") or report.get("started_at")),
+        "scheduled_by": str(report.get("scheduled_by") or ""),
+        "report_url": evaluator_api.get_job_report_url(report.get("project_id", ""), report.get("job_id") or report.get("id") or ""),
+        "fail_message": report.get("fail_message", ""),
+        "total": totals["total"],
+        "success": totals["success"],
+        "failed": totals["failed"],
+        "canceled": totals["canceled"],
+        "git_sha": str(source.get("git_sha") or "")[:12],
+        "git_ref_url": source.get("git_ref_url", ""),
+        "git_commit_url": source.get("git_commit_url", ""),
+        "source_url": git_url,
+    }
+
+
+@st.cache_data(ttl=30, show_spinner=False)
+def _fetch_recent_evaluator_job_pages(
+    project_id: str,
+    environment: str,
+    page_size: int,
+    pages_to_fetch: int,
+    status_values: tuple[str, ...] = (),
+    extra_filters: tuple[tuple[str, str, tuple[Any, ...]], ...] = (),
+) -> List[Dict[str, Any]]:
+    """Fetch recent evaluator jobs from the search endpoint page-by-page."""
+    if not project_id:
+        return []
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    api = evaluator_api.EvaluationRunAPI()
+    filters: List[Dict[str, Any]] = []
+    if status_values:
+        filters.append(
+            {
+                "field": "status",
+                "operator": "In",
+                "values": list(status_values),
+            }
+        )
+    for field, operator, values in extra_filters:
+        filters.append(
+            {
+                "field": field,
+                "operator": operator,
+                "values": list(values),
+            }
+        )
+    next_token = ""
+    pages: List[Dict[str, Any]] = []
+    for _ in range(max(1, int(pages_to_fetch))):
+        data = api.search_report_list(
+            project_id,
+            filters=filters or None,
+            next_token=next_token,
+            size=max(1, min(int(page_size), 100)),
+        )
+        reports = data.get("reports", []) or []
+        pages.append(
+            {
+                "jobs": [_summarize_recent_job(report) for report in reports],
+                "next_token": data.get("next_token", "") or "",
+            }
+        )
+        next_token = data.get("next_token", "") or ""
+        if not next_token:
+            break
+    return pages
+
+
+@st.cache_data(ttl=30, show_spinner=False)
+def _fetch_evaluator_job_detail(project_id: str, environment: str, job_id: str) -> Dict[str, Any]:
+    """Fetch deep evaluator detail for one job on demand."""
+    if not project_id or not job_id:
+        return {}
+    os.environ["AUTH_PROFILE"] = environment or ENVIRONMENT
+    api = evaluator_api.EvaluationRunAPI()
+    report = api.get_job_report(project_id, job_id)
+    suite_rows = api.get_suite_summary(project_id, job_id, use_available_case_results=True)
+    case_reports = api.get_case_reports(project_id, job_id)
+    summary = _summarize_recent_job(report)
+    return {
+        **summary,
+        "suite_rows": _extract_suite_rows(suite_rows),
+        "failed_case_rows": _extract_failed_case_rows(case_reports),
+        "raw_report": report,
+    }
+
+
+def _inject_recent_evaluator_jobs_styles() -> None:
+    """Task-adjacent styles for the recent evaluator jobs section."""
+    st.markdown(
+        """
+        <style>
+        .evj-card {
+            border-radius: 16px;
+            padding: 0.7rem 0.85rem;
+            border: 1px solid rgba(148, 163, 184, 0.22);
+            background: rgba(255, 255, 255, 0.92);
+            box-shadow: 0 8px 20px rgba(15, 23, 42, 0.05);
+        }
+        .evj-card--running {
+            border-color: rgba(245, 158, 11, 0.28);
+            background: linear-gradient(180deg, rgba(255, 251, 235, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-card--success {
+            border-color: rgba(16, 185, 129, 0.24);
+            background: linear-gradient(180deg, rgba(236, 253, 245, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-card--failed {
+            border-color: rgba(239, 68, 68, 0.24);
+            background: linear-gradient(180deg, rgba(254, 242, 242, 0.98), rgba(255,255,255,0.98));
+        }
+        .evj-top, .evj-meta, .evj-stats {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            flex-wrap: wrap;
+        }
+        .evj-top { justify-content: space-between; }
+        .evj-row {
+            display: grid;
+            grid-template-columns: minmax(180px, 1.3fr) minmax(86px, 0.5fr) minmax(108px, 0.7fr) minmax(180px, 1.15fr) minmax(130px, 0.9fr) minmax(180px, 1.1fr);
+            gap: 8px;
+            align-items: center;
+        }
+        .evj-title {
+            font-size: 0.9rem;
+            font-weight: 800;
+            color: #0f172a;
+            margin: 0;
+            word-break: break-word;
+        }
+        .evj-title a {
+            color: inherit;
+            text-decoration: none;
+        }
+        .evj-title a:hover {
+            text-decoration: underline;
+        }
+        .evj-name {
+            min-width: 0;
+        }
+        .evj-name .evj-title,
+        .evj-name .evj-name-sub,
+        .evj-ref-cell,
+        .evj-ref-cell .evj-name-sub {
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+        .evj-name-sub {
+            margin-top: 0.15rem;
+            font-size: 0.74rem;
+            color: #64748b;
+        }
+        .evj-status {
+            display: inline-flex;
+            align-items: center;
+            gap: 5px;
+            padding: 0.24rem 0.5rem;
+            border-radius: 999px;
+            font-size: 0.7rem;
+            font-weight: 800;
+            text-transform: lowercase;
+            letter-spacing: 0.01em;
+            border: 1px solid transparent;
+        }
+        .evj-status--running { color: #9a6700; background: #fff7db; border-color: rgba(245, 158, 11, 0.28); }
+        .evj-status--success { color: #047857; background: #dcfce7; border-color: rgba(16, 185, 129, 0.28); }
+        .evj-status--failed { color: #b91c1c; background: #fee2e2; border-color: rgba(239, 68, 68, 0.28); }
+        .evj-status--canceled { color: #7c3aed; background: #f3e8ff; border-color: rgba(124, 58, 237, 0.24); }
+        .evj-status--unknown { color: #475569; background: #f1f5f9; border-color: rgba(148, 163, 184, 0.28); }
+        .evj-status-mark {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 14px;
+            height: 14px;
+            border-radius: 999px;
+            font-size: 0.62rem;
+            font-weight: 900;
+            line-height: 1;
+            border: 1px solid currentColor;
+            flex: 0 0 auto;
+        }
+        .evj-status-mark--success {
+            background: rgba(4, 120, 87, 0.08);
+        }
+        .evj-status-mark--failed {
+            background: rgba(185, 28, 28, 0.08);
+        }
+        .evj-status-mark--canceled {
+            background: rgba(124, 58, 237, 0.08);
+        }
+        .evj-status-mark--unknown {
+            background: rgba(71, 85, 105, 0.08);
+        }
+        .evj-status-mark--running {
+            position: relative;
+            border-radius: 999px;
+            border: 1.5px solid rgba(154, 103, 0, 0.18);
+            border-top-color: currentColor;
+            border-right-color: currentColor;
+            background: transparent;
+            animation: evj-spin 0.9s linear infinite;
+        }
+        .evj-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 999px;
+            display: inline-block;
+            background: currentColor;
+            opacity: 0.88;
+        }
+        .evj-dot--pulse {
+            animation: evj-pulse 1.4s ease-in-out infinite;
+        }
+        @keyframes evj-pulse {
+            0% { transform: scale(0.9); opacity: 0.55; }
+            50% { transform: scale(1.2); opacity: 1; }
+            100% { transform: scale(0.9); opacity: 0.55; }
+        }
+        @keyframes evj-spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .evj-meta {
+            color: #475569;
+            font-size: 0.82rem;
+        }
+        .evj-list {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+            margin-top: 0.7rem;
+        }
+        .evj-toolbar-note {
+            margin: 0.15rem 0 0.35rem;
+            font-size: 0.72rem;
+            font-weight: 700;
+            letter-spacing: 0.02em;
+            color: #64748b;
+            text-transform: uppercase;
+        }
+        .evj-pager-note {
+            margin-top: 0.28rem;
+            font-size: 0.76rem;
+            color: #475569;
+            white-space: nowrap;
+        }
+        .evj-cell {
+            min-width: 0;
+            font-size: 0.78rem;
+            color: #334155;
+        }
+        .evj-cell a {
+            color: #0f766e;
+            text-decoration: none;
+            font-weight: 700;
+        }
+        .evj-cell a:hover {
+            text-decoration: underline;
+        }
+        .evj-cell strong {
+            color: #0f172a;
+        }
+        .evj-cell--nowrap {
+            white-space: nowrap;
+        }
+        .evj-detail {
+            margin-top: 1rem;
+            padding: 1rem 1rem 0.8rem;
+            border-radius: 18px;
+            border: 1px solid rgba(15, 118, 110, 0.14);
+            background:
+                radial-gradient(circle at top right, rgba(45, 212, 191, 0.10), transparent 24%),
+                linear-gradient(180deg, rgba(255,255,255,0.99), rgba(247,250,252,0.99));
+            box-shadow: 0 14px 30px rgba(15, 23, 42, 0.06);
+        }
+        .evj-stat {
+            flex: 1 1 80px;
+            min-width: 72px;
+            padding: 0.55rem 0.7rem;
+            border-radius: 14px;
+            background: rgba(248, 250, 252, 0.92);
+            border: 1px solid rgba(148, 163, 184, 0.16);
+        }
+        .evj-inline-stats {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 6px;
+            font-size: 0.76rem;
+            color: #334155;
+        }
+        [class*="st-key-recent_eval_view_"] button,
+        [class*="st-key-recent_eval_run_"] button,
+        [class*="st-key-recent_eval_jobs_prev"] button,
+        [class*="st-key-recent_eval_jobs_next"] button,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button,
+        [class*="st-key-refresh_recent_eval_jobs"] button {
+            min-height: 2rem;
+            padding: 0.18rem 0.58rem;
+            border-radius: 999px;
+            font-size: 0.72rem;
+            font-weight: 700;
+            box-shadow: none;
+        }
+        [class*="st-key-recent_eval_view_"] button,
+        [class*="st-key-recent_eval_jobs_prev"] button,
+        [class*="st-key-recent_eval_jobs_next"] button,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button,
+        [class*="st-key-refresh_recent_eval_jobs"] button {
+            border-color: rgba(148, 163, 184, 0.34);
+            color: #334155;
+            background: #ffffff;
+        }
+        [class*="st-key-recent_eval_view_"] button:hover,
+        [class*="st-key-recent_eval_jobs_prev"] button:hover,
+        [class*="st-key-recent_eval_jobs_next"] button:hover,
+        [class*="st-key-recent_eval_jobs_pagebtn_"] button:hover,
+        [class*="st-key-refresh_recent_eval_jobs"] button:hover {
+            border-color: rgba(15, 118, 110, 0.28);
+            color: #0f766e;
+            background: #f8fffd;
+        }
+        [class*="st-key-recent_eval_jobs_pagebtn_active_"] button {
+            border-color: rgba(13, 148, 136, 0.26);
+            background: linear-gradient(180deg, #f0fdfa, #ecfeff);
+            color: #0f766e;
+        }
+        [class*="st-key-recent_eval_run_"] button {
+            border-color: rgba(13, 148, 136, 0.22);
+            background: linear-gradient(180deg, #f0fdfa, #ecfeff);
+            color: #0f766e;
+        }
+        [class*="st-key-recent_eval_run_"] button:hover {
+            border-color: rgba(13, 148, 136, 0.34);
+            background: linear-gradient(180deg, #ccfbf1, #ecfeff);
+            color: #115e59;
+        }
+        .evj-stat-label {
+            display: block;
+            font-size: 0.68rem;
+            letter-spacing: 0.06em;
+            text-transform: uppercase;
+            color: #64748b;
+            font-weight: 800;
+            margin-bottom: 0.14rem;
+        }
+        .evj-stat-value {
+            display: block;
+            font-size: 1rem;
+            font-weight: 800;
+            color: #0f172a;
+        }
+        .evj-desc {
+            margin-top: 0.55rem;
+            font-size: 0.86rem;
+            color: #334155;
+        }
+        .evj-empty {
+            padding: 1rem 1.1rem;
+            border-radius: 18px;
+            background: #f8fafc;
+            border: 1px dashed rgba(148, 163, 184, 0.4);
+            color: #475569;
+        }
+        @media (max-width: 1080px) {
+            .evj-row {
+                grid-template-columns: 1fr;
+                gap: 8px;
+            }
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _render_recent_evaluator_job_card(job: Dict[str, Any], *, user_label: str = "Unknown") -> None:
+    """Render one recent evaluator job as a single-row list item."""
+    variant = html.escape(job.get("status_variant", "unknown"))
+    status = html.escape(_status_display_label(job.get("status", "unknown") or "unknown"))
+    title_text = html.escape(job.get("title", "—"))
+    description = html.escape(job.get("description", "") or "")
+    catalog = html.escape(job.get("catalog", "") or "—")
+    catalog_url = html.escape(job.get("catalog_url", "") or "")
+    scheduled = html.escape(_format_jst_time_compact(job.get("scheduled_at")))
+    duration = html.escape(job.get("duration", "—"))
+    job_id = html.escape(str(job.get("job_id", "")))
+    build_status = html.escape(job.get("build_status", "") or "—")
+    test_status = html.escape(job.get("test_status", "") or "—")
+    created_label = html.escape(job.get("created_label", "—"))
+    git_sha = html.escape(job.get("git_sha", "") or "—")
+    source_label = html.escape(job.get("source_label", "") or "—")
+    user_text = html.escape(user_label or "Unknown")
+    report_url = html.escape(job.get("report_url", "") or "")
+    source_url = html.escape(job.get("git_ref_url", "") or job.get("source_url", "") or "")
+    status_variant = job.get("status_variant", "unknown")
+    status_mark = {
+        "running": '<span class="evj-status-mark evj-status-mark--running" aria-hidden="true"></span>',
+        "success": '<span class="evj-status-mark evj-status-mark--success" aria-hidden="true">✓</span>',
+        "failed": '<span class="evj-status-mark evj-status-mark--failed" aria-hidden="true">!</span>',
+        "canceled": '<span class="evj-status-mark evj-status-mark--canceled" aria-hidden="true">×</span>',
+    }.get(status_variant, '<span class="evj-status-mark evj-status-mark--unknown" aria-hidden="true">?</span>')
+    meta_line = job_id
+    counts = (
+        f'S <strong>{int(job.get("success", 0))}</strong> · '
+        f'F <strong>{int(job.get("failed", 0))}</strong> · '
+        f'C <strong>{int(job.get("canceled", 0))}</strong> / '
+        f'<strong>{int(job.get("total", 0))}</strong>'
+    )
+    title_html = f'<a href="{report_url}" target="_blank" rel="noopener noreferrer">{title_text}</a>' if report_url else title_text
+    source_html = (
+        f'<a href="{source_url}" target="_blank" rel="noopener noreferrer">{source_label}</a>'
+        if source_url else source_label
+    )
+    catalog_html = (
+        f'<a href="{catalog_url}" target="_blank" rel="noopener noreferrer">{catalog}</a>'
+        if catalog_url else catalog
+    )
+    st.markdown(
+        f"""
+        <div class="evj-card evj-card--{variant}">
+          <div class="evj-row">
+            <div class="evj-name">
+              <div class="evj-title">{title_html}</div>
+              <div class="evj-name-sub">{meta_line}</div>
+            </div>
+            <div class="evj-cell evj-cell--nowrap">
+              <span class="evj-status evj-status--{variant}">{status_mark}{status}</span>
+            </div>
+            <div class="evj-cell">
+              <strong>{scheduled}</strong><br><span class="evj-name-sub">{duration} · {created_label}</span>
+            </div>
+            <div class="evj-cell evj-ref-cell">
+              <strong>{catalog_html}</strong><br><span class="evj-name-sub">{source_html}</span>
+            </div>
+            <div class="evj-cell evj-ref-cell">
+              <strong>{user_text}</strong>
+            </div>
+            <div class="evj-cell">
+              <span class="evj-name-sub">build {build_status} · test {test_status} · {git_sha}</span><br>
+              <span class="evj-inline-stats">{counts}</span>
+            </div>
+          </div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _render_recent_evaluator_job_detail(project_id: str, environment: str, job: Dict[str, Any]) -> None:
+    """Render detailed evaluator-job information inside an expander."""
+    job_id = str(job.get("job_id", "") or "")
+    if not job_id:
+        st.warning("Missing job id.")
+        return
+    try:
+        detail = _fetch_evaluator_job_detail(project_id, environment, job_id)
+    except Exception as e:
+        st.error(f"Could not fetch evaluator details: {e}")
+        return
+
+    st.markdown("**Overview**")
+    top_cols = st.columns(4)
+    top_cols[0].metric("Total", int(detail.get("total", 0)))
+    top_cols[1].metric("Success", int(detail.get("success", 0)))
+    top_cols[2].metric("Failed", int(detail.get("failed", 0)))
+    top_cols[3].metric("Canceled", int(detail.get("canceled", 0)))
+
+    overview_left, overview_right = st.columns([1.3, 1.1])
+    with overview_left:
+        st.write(f"Status: `{detail.get('status', 'unknown')}`")
+        st.write(f"Title: `{detail.get('title', '—')}`")
+        st.write(f"Build/Test: `{detail.get('build_status', '—')}` / `{detail.get('test_status', '—')}`")
+        st.write(f"Ref: `{detail.get('target', '—')}`")
+        st.write(f"Catalog: `{detail.get('catalog', '—')}`")
+        st.write(f"Repo: `{detail.get('source_repo_label', '—')}`")
+    with overview_right:
+        st.write(f"Scheduled: `{_format_jst_time(detail.get('scheduled_at'), include_seconds=True)}`")
+        st.write(f"Started: `{_format_jst_time(detail.get('started_at'), include_seconds=True)}`")
+        st.write(f"Finished: `{_format_jst_time(detail.get('finished_at'), include_seconds=True)}`")
+        st.write(f"Duration: `{detail.get('duration', '—')}`")
+        st.write(f"SHA: `{detail.get('git_sha', '—')}`")
+
+    action_cols = st.columns([1.2, 1.2, 4])
+    report_url = detail.get("report_url", "")
+    catalog_url = detail.get("catalog_url", "")
+    source_url = detail.get("source_url", "") or detail.get("git_ref_url", "")
+    with action_cols[0]:
+        if report_url:
+            st.link_button("Open report", report_url, use_container_width=True)
+    with action_cols[1]:
+        if catalog_url:
+            st.link_button("Open catalog", catalog_url, use_container_width=True)
+    with action_cols[2]:
+        if source_url:
+            st.link_button("Open source", source_url, use_container_width=True)
+
+    if detail.get("fail_message"):
+        st.warning(detail.get("fail_message"))
+
+    suite_rows = detail.get("suite_rows") or []
+    with st.expander(f"Suites ({len(suite_rows)})", expanded=bool(suite_rows)):
+        if suite_rows:
+            st.dataframe(pd.DataFrame(suite_rows), width="stretch", hide_index=True)
+        else:
+            st.caption("No suite summary available.")
+
+    failed_case_rows = detail.get("failed_case_rows") or []
+    with st.expander(f"Failed Cases ({len(failed_case_rows)})", expanded=bool(failed_case_rows)):
+        if failed_case_rows:
+            st.dataframe(pd.DataFrame(failed_case_rows), width="stretch", hide_index=True)
+        else:
+            st.caption("No failed cases in the current report.")
+
+    with st.expander("Raw JSON", expanded=False):
+        st.json(detail.get("raw_report", {}))
+
+
+def _render_recent_evaluator_job_run_dialog(
+    project_id: str,
+    environment: str,
+    job: Dict[str, Any],
+    *,
+    output_path_default: str,
+    download_type_default: str,
+    phase_default: str,
+    skip_large_file_default: bool,
+    large_file_mb_default: float,
+    keep_zip_files_default: bool,
+) -> None:
+    """Render the dialog used to enqueue Download + Eval + Parquet from a recent job row."""
+    job_id = str(job.get("job_id", "") or "")
+    if not job_id:
+        st.error("Missing evaluator job id.")
+        return
+
+    detail = _fetch_evaluator_job_detail(project_id, environment, job_id)
+    suite_options = _extract_suite_selection_options(detail.get("suite_rows") or [])
+    suite_label_to_id = {opt["label"]: opt["id"] for opt in suite_options}
+    suite_labels = [opt["label"] for opt in suite_options]
+
+    st.caption("Confirm the workflow options for this evaluator job, then start a background task.")
+    summary_cols = st.columns([1.45, 1.15, 1.35, 1.05])
+    summary_cols[0].markdown(f"**Title**  \n`{detail.get('title', '—')}`")
+    summary_cols[1].markdown(f"**Status**  \n`{detail.get('status', 'unknown')}`")
+    summary_cols[2].markdown(f"**Catalog**  \n`{detail.get('catalog', '—')}`")
+    summary_cols[3].markdown(f"**Cases**  \n`{int(detail.get('total', 0))}`")
+
+    with st.form(key=f"recent_eval_run_form_{job_id}", border=False):
+        run_output_path = st.text_input(
+            "Output path",
+            value=output_path_default,
+            help="Folder under the data directory. This uses the same safe path rules as the main download workflow.",
+        )
+
+        if not suite_labels:
+            hint_cols = st.columns([1.2, 2.8])
+            with hint_cols[0]:
+                if st.form_submit_button("Refresh suites", use_container_width=True):
+                    _fetch_evaluator_job_detail.clear()
+                    st.rerun()
+            with hint_cols[1]:
+                st.caption("No suite candidates were available yet for this job. Refresh to re-read suite data from the evaluator API.")
+
+        selected_suite_labels = st.multiselect(
+            "Suites to download (optional)",
+            options=suite_labels,
+            default=[],
+            help="Leave empty to download all suites from this evaluator job.",
+            disabled=not suite_labels,
+        )
+
+        run_download_type = st.radio(
+            "Download type",
+            ["Archives (ZIP)", "Result JSON only"],
+            index=0 if download_type_default == "Archives (ZIP)" else 1,
+            horizontal=True,
+        )
+
+        run_phase = ""
+        run_skip_large_file = False
+        run_large_file_mb = 50.0
+        run_keep_zip_files = False
+        if run_download_type == "Archives (ZIP)":
+            run_phase = st.text_input(
+                "Phase to extract",
+                value=phase_default,
+                help="Enter the phase name to extract from archives.",
+            )
+            opt_cols = st.columns([1.2, 1.3, 1.2])
+            with opt_cols[0]:
+                run_skip_large_file = st.checkbox(
+                    "Skip large files",
+                    value=skip_large_file_default,
+                    help="Skip unusually large archives during download.",
+                )
+            with opt_cols[1]:
+                run_large_file_mb = st.number_input(
+                    "Skip threshold (MB)",
+                    min_value=1.0,
+                    max_value=5000.0,
+                    step=1.0,
+                    value=float(large_file_mb_default),
+                )
+            with opt_cols[2]:
+                run_keep_zip_files = st.checkbox(
+                    "Keep ZIP files",
+                    value=keep_zip_files_default,
+                    help="Keep downloaded ZIPs after extraction.",
+                )
+
+        run_cols = st.columns([1.25, 1.25, 1.1])
+        with run_cols[0]:
+            run_eval = st.checkbox(
+                "Run evaluation",
+                value=True,
+                help="Run eval_result and generate Summary.csv / Score.csv after download.",
+            )
+        with run_cols[1]:
+            generate_parquet = st.checkbox(
+                "Generate parquet",
+                value=CATALOG_IO_AVAILABLE,
+                disabled=not CATALOG_IO_AVAILABLE,
+                help="Build scene_result.parquet from .pkl files." if CATALOG_IO_AVAILABLE else "Install perception_catalog_analyzer to enable this.",
+            )
+        with run_cols[2]:
+            eval_recursive = st.checkbox(
+                "Recursive eval",
+                value=True,
+                help="Search subdirectories for evaluation result folders.",
+            )
+
+        action_cols = st.columns([1.15, 1.15, 3.7])
+        cancel_clicked = action_cols[0].form_submit_button("Cancel", use_container_width=True)
+        start_clicked = action_cols[1].form_submit_button("Start", type="primary", use_container_width=True)
+
+    if cancel_clicked:
+        st.session_state.pop("recent_eval_jobs_run_selected", None)
+        st.rerun()
+
+    if not start_clicked:
+        return
+
+    resolved_output, path_err = resolve_under_data_root(run_output_path, allow_create=True)
+    if path_err:
+        st.error(f"Output path is invalid: {path_err}")
+        return
+
+    selected_suite_ids = [suite_label_to_id[label] for label in selected_suite_labels]
+    resolved_path_str = str(resolved_output)
+    set_config_value("output_path", to_data_relative(resolved_output))
+    set_config_value("environment", environment)
+    set_config_value("project_id", project_id)
+    set_config_value("job_id", job_id)
+    set_config_value("suite_id", "")
+    set_config_value("suite_ids", selected_suite_ids)
+    set_config_value("download_type", run_download_type)
+    if run_download_type == "Archives (ZIP)":
+        set_config_value("phase", run_phase)
+        set_config_value("skip_large_file", run_skip_large_file)
+        set_config_value("large_file_mb", run_large_file_mb)
+        set_config_value("keep_zip_files", run_keep_zip_files)
+
+    params = {
+        "output_path": resolved_path_str,
+        "project_id": project_id,
+        "job_id": job_id,
+        "suite_id": "",
+        "suite_ids": selected_suite_ids or None,
+        "download_type": "archives" if run_download_type == "Archives (ZIP)" else "result_json",
+        "phase": run_phase if run_download_type == "Archives (ZIP)" else "",
+        "skip_large_file": run_skip_large_file if run_download_type == "Archives (ZIP)" else False,
+        "large_file_mb": run_large_file_mb if run_download_type == "Archives (ZIP)" else 50.0,
+        "keep_zip_files": run_keep_zip_files if run_download_type == "Archives (ZIP)" else False,
+        "run_eval": run_eval,
+        "generate_parquet": generate_parquet,
+        "eval_recursive": eval_recursive,
+        "eval_overwrite": False,
+        "eval_workers": _default_eval_workers(),
+    }
+    task_id = _enqueue_task("download_and_eval", params)
+    if not task_id:
+        st.error("Failed to enqueue task. Check REDIS_URL and DATABASE_URL.")
+        return
+
+    st.session_state["recent_eval_jobs_flash"] = (
+        f"Queued Download + Eval + Parquet for `{detail.get('title', job_id)}`. "
+        f"Task id: `{task_id}`."
+    )
+    st.session_state.pop("recent_eval_jobs_run_selected", None)
+    st.rerun()
+
+
+def _render_recent_evaluator_jobs_section(
+    project_id: str,
+    environment: str,
+    *,
+    output_path_default: str,
+    download_type_default: str,
+    phase_default: str,
+    skip_large_file_default: bool,
+    large_file_mb_default: float,
+    keep_zip_files_default: bool,
+) -> None:
+    """Render a direct evaluator-jobs browser above the download tabs."""
+    _inject_recent_evaluator_jobs_styles()
+    show_section = st.toggle(
+        "Show recent evaluator jobs",
+        value=st.session_state.get("recent_eval_jobs_show", False),
+        key="recent_eval_jobs_show",
+        help="Load recent evaluator jobs only when you want to browse them.",
+    )
+    if not show_section:
+        return
+
+    st.subheader("Recent evaluator jobs")
+    st.caption("Compact browser for recent evaluator jobs. Select one job to inspect detailed suite and failed-case information.")
+    flash_message = st.session_state.pop("recent_eval_jobs_flash", None)
+    if flash_message:
+        st.success(flash_message)
+    user_directory = _get_recent_eval_user_directory()
+
+    control_cols = st.columns([0.75, 1.0, 1.15, 1.45, 1.25, 1.0, 1.0, 0.75])
+    with control_cols[0]:
+        st.markdown('<div class="evj-toolbar-note">Rows</div>', unsafe_allow_html=True)
+        limit = int(
+            st.selectbox(
+                "Rows",
+                options=[6, 12, 20, 30],
+                index=1,
+                key="recent_eval_jobs_limit",
+                help="How many recent evaluator jobs to fetch for this project.",
+                label_visibility="collapsed",
+            )
+        )
+    with control_cols[1]:
+        st.markdown('<div class="evj-toolbar-note">Status</div>', unsafe_allow_html=True)
+        status_filter = st.multiselect(
+            "Status",
+            options=["running", "success", "failed", "canceled", "unknown"],
+            default=[],
+            key="recent_eval_jobs_status_filter",
+            help="Leave empty to show all recent jobs.",
+            label_visibility="collapsed",
+            placeholder="All statuses",
+        )
+    with control_cols[2]:
+        st.markdown('<div class="evj-toolbar-note">Search In</div>', unsafe_allow_html=True)
+        search_scope = st.selectbox(
+            "Search in",
+            options=["Branch/tag", "Description", "Job ID", "Git SHA", "Fail message"],
+            index=0,
+            key="recent_eval_jobs_search_scope",
+            help="Choose which evaluator field the quick search should target.",
+            label_visibility="collapsed",
+        )
+    with control_cols[3]:
+        st.markdown('<div class="evj-toolbar-note">Search</div>', unsafe_allow_html=True)
+        search_text = st.text_input(
+            "Search",
+            value=st.session_state.get("recent_eval_jobs_search_text", ""),
+            key="recent_eval_jobs_search_text",
+            help="Server-side search across the selected field.",
+            label_visibility="collapsed",
+            placeholder="Type to search evaluator jobs",
+        ).strip()
+    recent_candidates = _get_recent_job_search_history(search_scope)
+    selected_user_name = ""
+    if recent_candidates:
+        recent_choice = st.selectbox(
+            "Recent searches",
+            options=[""] + recent_candidates,
+            index=0,
+            key=f"recent_eval_jobs_search_recent::{search_scope}",
+            help="Reuse a previously entered search for this field.",
+        )
+        if recent_choice and recent_choice != search_text:
+            st.session_state["recent_eval_jobs_search_text"] = recent_choice
+            st.rerun()
+    user_candidates = sorted(
+        {
+            info.get("name", "").strip()
+            for info in user_directory.values()
+            if info.get("name", "").strip()
+        },
+        key=str.lower,
+    )
+    with control_cols[4]:
+        st.markdown('<div class="evj-toolbar-note">User</div>', unsafe_allow_html=True)
+        selected_user_name = st.selectbox(
+            "User",
+            options=[""] + user_candidates,
+            index=0,
+            key="recent_eval_jobs_user_filter",
+            help="Filter jobs by resolved scheduled user name.",
+            label_visibility="collapsed",
+        )
+    with control_cols[5]:
+        st.markdown('<div class="evj-toolbar-note">From</div>', unsafe_allow_html=True)
+        date_from = st.date_input(
+            "From",
+            value=st.session_state.get("recent_eval_jobs_date_from", None),
+            key="recent_eval_jobs_date_from",
+            label_visibility="collapsed",
+            help="Scheduled-at lower bound in JST.",
+        )
+    with control_cols[6]:
+        st.markdown('<div class="evj-toolbar-note">To</div>', unsafe_allow_html=True)
+        date_to = st.date_input(
+            "To",
+            value=st.session_state.get("recent_eval_jobs_date_to", None),
+            key="recent_eval_jobs_date_to",
+            label_visibility="collapsed",
+            help="Scheduled-at upper bound in JST.",
+        )
+    with control_cols[7]:
+        st.markdown('<div class="evj-toolbar-note">Actions</div>', unsafe_allow_html=True)
+        if st.button("Refresh", key="refresh_recent_eval_jobs", use_container_width=True):
+            _fetch_recent_evaluator_job_pages.clear()
+            _fetch_evaluator_job_detail.clear()
+            st.rerun()
+
+    page_key = "recent_eval_jobs_page"
+    if page_key not in st.session_state:
+        st.session_state[page_key] = 1
+    if date_from and date_to and date_from > date_to:
+        st.warning("`From` date must be earlier than or equal to `To` date.")
+        return
+
+    def _render_job_list() -> None:
+        nonlocal user_directory
+        if not project_id:
+            st.info("Enter a project id in the sidebar to browse recent evaluator jobs.")
+            return
+        current_page = max(1, int(st.session_state.get(page_key, 1)))
+        pages_to_fetch = max(3, current_page + 2)
+        if search_text or status_filter or date_from or date_to or selected_user_name:
+            pages_to_fetch = max(pages_to_fetch, 6)
+        server_status_values = tuple(_status_filter_values(status_filter))
+        server_search_filter, search_needle = _build_recent_job_search_filter(search_text, search_scope, user_directory)
+        selected_user_ids = sorted(
+            {
+                subject_id
+                for subject_id, info in user_directory.items()
+                if selected_user_name
+                and selected_user_name.lower() == str(info.get("name") or "").strip().lower()
+            }
+        )
+        server_date_filters = _build_recent_job_date_filters(date_from, date_to)
+        extra_filters: List[Dict[str, Any]] = []
+        if server_search_filter:
+            extra_filters.append(server_search_filter)
+        if selected_user_ids:
+            extra_filters.append(
+                {
+                    "field": "scheduled_by",
+                    "operator": "In",
+                    "values": selected_user_ids,
+                }
+            )
+        extra_filters.extend(server_date_filters)
+        extra_filter_tuples = tuple(
+            (
+                str(f["field"]),
+                str(f["operator"]),
+                tuple(f.get("values", []) or []),
+            )
+            for f in extra_filters
+        )
+        try:
+            fetched_pages = _fetch_recent_evaluator_job_pages(
+                project_id,
+                environment,
+                limit,
+                pages_to_fetch,
+                status_values=server_status_values,
+                extra_filters=extra_filter_tuples,
+            )
+        except Exception as e:
+            st.error(f"Could not fetch recent evaluator jobs: {e}")
+            return
+        if search_text:
+            _save_recent_job_search_history(search_scope, search_text)
+
+        jobs = [job for page in fetched_pages for job in page.get("jobs", [])]
+        user_directory = _hydrate_recent_eval_user_directory(jobs, environment)
+        has_more_from_api = bool(fetched_pages and fetched_pages[-1].get("next_token"))
+
+        if search_needle:
+            if search_scope == "Branch/tag":
+                jobs = [job for job in jobs if search_needle in str(job.get("target", "")).lower()]
+            elif search_scope == "Description":
+                jobs = [job for job in jobs if search_needle in str(job.get("description", "")).lower() or search_needle in str(job.get("title", "")).lower()]
+            elif search_scope == "Job ID":
+                jobs = [job for job in jobs if search_needle in str(job.get("job_id", "")).lower()]
+            elif search_scope == "Git SHA":
+                jobs = [job for job in jobs if search_needle in str(job.get("git_sha", "")).lower()]
+            elif search_scope == "Fail message":
+                jobs = [job for job in jobs if search_needle in str(job.get("fail_message", "")).lower()]
+        if selected_user_name:
+            selected_lower = selected_user_name.lower()
+            jobs = [
+                job for job in jobs
+                if selected_lower == str((user_directory.get(str(job.get("scheduled_by") or "").strip(), {}) or {}).get("name", "")).strip().lower()
+            ]
+        if status_filter:
+            selected = {evaluator_api.normalize_job_status(v) for v in status_filter}
+            jobs = [job for job in jobs if job.get("status_variant") in selected or evaluator_api.normalize_job_status(job.get("status", "")) in selected]
+
+        if not jobs:
+            st.session_state[page_key] = 1
+            st.markdown('<div class="evj-empty">No recent evaluator jobs matched the current filters.</div>', unsafe_allow_html=True)
+            return
+
+        total_loaded = len(jobs)
+        has_next_page = total_loaded > current_page * limit or has_more_from_api
+        max_known_page = max(1, (total_loaded + limit - 1) // limit)
+        if current_page > max_known_page:
+            current_page = max_known_page
+            st.session_state[page_key] = current_page
+        start_idx = (current_page - 1) * limit
+        end_idx = start_idx + limit
+        visible_jobs = jobs[start_idx:end_idx]
+        if not visible_jobs and current_page > 1:
+            current_page = max(1, current_page - 1)
+            st.session_state[page_key] = current_page
+            start_idx = (current_page - 1) * limit
+            end_idx = start_idx + limit
+            visible_jobs = jobs[start_idx:end_idx]
+            has_next_page = total_loaded > current_page * limit
+
+        if current_page == 1:
+            page_numbers = list(range(1, min(3, max_known_page) + 1))
+        else:
+            page_numbers = list(
+                range(
+                    max(1, current_page - 1),
+                    min(max_known_page, current_page + 1) + 1,
+                )
+            )
+        pager_cols = st.columns([0.8, 0.9, 0.9, 0.9, 0.8, 5.7])
+        with pager_cols[0]:
+            if st.button("‹", key="recent_eval_jobs_prev", use_container_width=True, disabled=current_page <= 1):
+                st.session_state[page_key] = max(1, current_page - 1)
+                st.rerun()
+        for idx, page_num in enumerate(page_numbers[:3], start=1):
+            with pager_cols[idx]:
+                btn_key = (
+                    f"recent_eval_jobs_pagebtn_active_{page_num}"
+                    if page_num == current_page
+                    else f"recent_eval_jobs_pagebtn_{page_num}"
+                )
+                if st.button(
+                    str(page_num),
+                    key=btn_key,
+                    use_container_width=True,
+                    disabled=page_num == current_page,
+                ):
+                    st.session_state[page_key] = page_num
+                    st.rerun()
+        with pager_cols[4]:
+            if st.button("›", key="recent_eval_jobs_next", use_container_width=True, disabled=not has_next_page):
+                st.session_state[page_key] = current_page + 1
+                st.rerun()
+
+        selected_job_id = st.session_state.get("recent_eval_jobs_selected")
+        if selected_job_id and not any(str(job.get("job_id", "")) == str(selected_job_id) for job in jobs):
+            st.session_state.pop("recent_eval_jobs_selected", None)
+            selected_job_id = None
+
+        selected_run_job_id = st.session_state.get("recent_eval_jobs_run_selected")
+        if selected_run_job_id and not any(str(job.get("job_id", "")) == str(selected_run_job_id) for job in jobs):
+            st.session_state.pop("recent_eval_jobs_run_selected", None)
+            selected_run_job_id = None
+
+        st.markdown('<div class="evj-list">', unsafe_allow_html=True)
+        for job in visible_jobs:
+            subject_id = str(job.get("scheduled_by") or "").strip()
+            user_info = user_directory.get(subject_id, {})
+            user_label = str(user_info.get("name") or subject_id or "Unknown").strip()
+            row_cols = st.columns([9.8, 2.0])
+            with row_cols[0]:
+                _render_recent_evaluator_job_card(job, user_label=user_label)
+            with row_cols[1]:
+                action_cols = st.columns([1.0, 1.0], gap="small")
+                with action_cols[0]:
+                    if st.button("Details", key=f"recent_eval_view_{job['job_id']}", use_container_width=True):
+                        st.session_state["recent_eval_jobs_selected"] = str(job["job_id"])
+                        _fetch_evaluator_job_detail.clear()
+                        st.rerun()
+                with action_cols[1]:
+                    if st.button("Run", key=f"recent_eval_run_{job['job_id']}", use_container_width=True):
+                        st.session_state["recent_eval_jobs_run_selected"] = str(job["job_id"])
+                        _fetch_evaluator_job_detail.clear()
+                        st.rerun()
+        st.markdown("</div>", unsafe_allow_html=True)
+
+        selected_job_id = st.session_state.get("recent_eval_jobs_selected")
+        if selected_job_id:
+            selected_job = next((job for job in jobs if str(job.get("job_id", "")) == str(selected_job_id)), None)
+            if selected_job:
+                if callable(getattr(st, "dialog", None)):
+                    try:
+                        @st.dialog(f"Job details · {selected_job.get('title', '—')}", width="large")
+                        def _recent_eval_job_dialog() -> None:
+                            _render_recent_evaluator_job_detail(project_id, environment, selected_job)
+                            if st.button("Close", key="recent_eval_jobs_close_detail", use_container_width=True):
+                                st.session_state.pop("recent_eval_jobs_selected", None)
+                                st.rerun()
+
+                        _recent_eval_job_dialog()
+                    finally:
+                        st.session_state.pop("recent_eval_jobs_selected", None)
+                else:
+                    st.markdown('<div class="evj-detail">', unsafe_allow_html=True)
+                    hdr_cols = st.columns([4.4, 1.1])
+                    with hdr_cols[0]:
+                        st.subheader(f"Job details · {selected_job.get('title', '—')}")
+                    with hdr_cols[1]:
+                        if st.button("Close", key="recent_eval_jobs_close_detail_fallback", use_container_width=True):
+                            st.session_state.pop("recent_eval_jobs_selected", None)
+                            st.rerun()
+                    _render_recent_evaluator_job_detail(project_id, environment, selected_job)
+                    st.markdown("</div>", unsafe_allow_html=True)
+
+        selected_run_job_id = st.session_state.get("recent_eval_jobs_run_selected")
+        if selected_run_job_id:
+            selected_run_job = next((job for job in jobs if str(job.get("job_id", "")) == str(selected_run_job_id)), None)
+            if selected_run_job:
+                if callable(getattr(st, "dialog", None)):
+                    try:
+                        @st.dialog(f"Download + Eval + Parquet · {selected_run_job.get('title', '—')}", width="large")
+                        def _recent_eval_run_dialog() -> None:
+                            _render_recent_evaluator_job_run_dialog(
+                                project_id,
+                                environment,
+                                selected_run_job,
+                                output_path_default=output_path_default,
+                                download_type_default=download_type_default,
+                                phase_default=phase_default,
+                                skip_large_file_default=skip_large_file_default,
+                                large_file_mb_default=large_file_mb_default,
+                                keep_zip_files_default=keep_zip_files_default,
+                            )
+
+                        _recent_eval_run_dialog()
+                    finally:
+                        if st.session_state.get("recent_eval_jobs_run_selected") == str(selected_run_job_id):
+                            st.session_state.pop("recent_eval_jobs_run_selected", None)
+                else:
+                    st.markdown('<div class="evj-detail">', unsafe_allow_html=True)
+                    hdr_cols = st.columns([4.4, 1.1])
+                    with hdr_cols[0]:
+                        st.subheader(f"Download + Eval + Parquet · {selected_run_job.get('title', '—')}")
+                    with hdr_cols[1]:
+                        if st.button("Close", key="recent_eval_jobs_close_run_fallback", use_container_width=True):
+                            st.session_state.pop("recent_eval_jobs_run_selected", None)
+                            st.rerun()
+                    _render_recent_evaluator_job_run_dialog(
+                        project_id,
+                        environment,
+                        selected_run_job,
+                        output_path_default=output_path_default,
+                        download_type_default=download_type_default,
+                        phase_default=phase_default,
+                        skip_large_file_default=skip_large_file_default,
+                        large_file_mb_default=large_file_mb_default,
+                        keep_zip_files_default=keep_zip_files_default,
+                    )
+                    st.markdown("</div>", unsafe_allow_html=True)
+
+    _render_job_list()
+
+
 # Sidebar for configuration
 with st.sidebar:
     st.header("Configuration")
@@ -1330,7 +2690,6 @@ def on_suite_id_change():
         skip_large_file = False
         large_file_mb = 50.0  # Doesn't apply
 
-
 st.markdown('<p class="dl-tabs-rail">Pick a workflow</p>', unsafe_allow_html=True)
 tab1, tab2, tab3, tab4 = st.tabs(
     ["📥 Download Results", "🗺️ Download Scenarios", "📊 View Downloads", "🧮 Eval Results"]
@@ -1518,6 +2877,188 @@ def on_suite_id_change():
             st.error(f"❌ Error: {str(e)}")
             st.exception(e)
 
+    # === Combined Download + Eval + Parquet Button ===
+    st.divider()
+    st.subheader("🚀 Combined Workflow: Download + Eval + Parquet")
+    st.caption("Download results, run evaluation, and generate parquet in one click. Eval only runs if download succeeds.")
+    
+    # Options for combined workflow
+    col_combo1, col_combo2 = st.columns(2)
+    with col_combo1:
+        combined_run_eval = st.checkbox(
+            "Run evaluation (eval_result + Summary/Score CSV)",
+            value=True,
+            key="combined_run_eval",
+            help="Run eval_result on downloaded directories and generate Summary.csv/Score.csv"
+        )
+    with col_combo2:
+        combined_generate_parquet = st.checkbox(
+            "Generate parquet",
+            value=CATALOG_IO_AVAILABLE,
+            key="combined_generate_parquet",
+            help="Build scene_result.parquet from .pkl files" if CATALOG_IO_AVAILABLE else "Install perception_catalog_analyzer to enable",
+            disabled=not CATALOG_IO_AVAILABLE,
+        )
+    
+    combined_eval_recursive = st.checkbox(
+        "Search subdirectories for eval",
+        value=True,
+        key="combined_eval_recursive",
+        help="Recursively search for result directories"
+    )
+    
+    if st.button("📥 Download + Eval + Parquet", type="primary", key="download_and_eval_btn"):
+        st.session_state.stop_downloads = False
+        if not all([project_id, st.session_state.job_id]):
+            st.error("Please fill in all required fields: Project ID and Job ID")
+            st.stop()
+        resolved_output, path_err = resolve_under_data_root(output_path, allow_create=True)
+        if path_err:
+            st.error(f"Output path is invalid: {path_err}. Use a path under the server data root.")
+            st.stop()
+        resolved_path_str = str(resolved_output)
+        set_config_value("output_path", to_data_relative(resolved_output))
+        set_config_value("environment", environment)
+        set_config_value("project_id", project_id)
+        set_config_value("job_id", st.session_state.job_id)
+        set_config_value("suite_id", suite_id)
+        set_config_value("suite_ids", selected_suite_ids)
+        set_config_value("download_type", download_type)
+        if download_type == "Archives (ZIP)":
+            set_config_value("phase", phase)
+            set_config_value("skip_large_file", skip_large_file)
+            set_config_value("large_file_mb", large_file_mb)
+            set_config_value("keep_zip_files", keep_zip_files)
+
+        if is_task_queue_enabled():
+            # Enqueue combined task
+            params = {
+                "output_path": resolved_path_str,
+                "project_id": project_id,
+                "job_id": st.session_state.job_id,
+                "suite_id": suite_id or "",
+                "suite_ids": selected_suite_ids or None,
+                "download_type": "archives" if download_type == "Archives (ZIP)" else "result_json",
+                "phase": phase if download_type == "Archives (ZIP)" else "",
+                "skip_large_file": skip_large_file,
+                "large_file_mb": large_file_mb,
+                "keep_zip_files": keep_zip_files,
+                "run_eval": combined_run_eval,
+                "generate_parquet": combined_generate_parquet,
+                "eval_recursive": combined_eval_recursive,
+                "eval_overwrite": False,
+                "eval_workers": _default_eval_workers(),
+            }
+            task_id = _enqueue_task("download_and_eval", params)
+            if task_id:
+                st.success("Combined task queued. It will appear in the **Task status** section below; the list updates automatically.")
+                st.info("The task will: 1) Download results → 2) Run eval (if download succeeds) → 3) Generate parquet (if download succeeds)")
+            else:
+                st.error("Failed to enqueue task. Check REDIS_URL and DATABASE_URL.")
+            st.stop()
+
+        # Inline execution (non-task-queue mode)
+        os.makedirs(resolved_path_str, exist_ok=True)
+        try:
+            job_result = JobResult(
+                environment=environment,
+                project_id=project_id,
+                job_id=st.session_state.job_id,
+                suite_id=suite_id,
+                suite_ids=selected_suite_ids,
+                output_path=resolved_path_str,
+            )
+            
+            # Progress containers
+            progress_placeholder = st.empty()
+            status_placeholder = st.empty()
+            
+            def inline_progress(msg: str):
+                status_placeholder.info(msg)
+            
+            # Step 1: Download
+            progress_placeholder.info("📥 Step 1/3: Downloading results...")
+            download_successful = False
+            if download_type == "Archives (ZIP)":
+                with st.expander("Downloading Archives", expanded=True):
+                    remain_list = job_result.download_archive_and_unzip(
+                        phase,
+                        skip_large_file=skip_large_file,
+                        large_file_mb=large_file_mb,
+                        keep_zip_files=keep_zip_files,
+                    )
+                    download_successful = len(remain_list) > 0
+                    st.success(f"✅ Downloaded and extracted {len(remain_list)} archives")
+            else:
+                with st.expander("Downloading Result JSON", expanded=True):
+                    log_dicts = job_result.download_result_json()
+                    download_successful = len(log_dicts) > 0
+                    st.success(f"✅ Downloaded {len(log_dicts)} JSON files")
+            
+            if not download_successful:
+                st.error("❌ Download failed. Cannot continue with evaluation.")
+                st.stop()
+            
+            # Step 2: Run eval
+            if combined_run_eval:
+                progress_placeholder.info("🧮 Step 2/3: Running evaluation...")
+                target_dirs = find_eval_result_dirs(resolved_path_str, recursive=combined_eval_recursive)
+                if target_dirs:
+                    eval_results = []
+                    eval_progress = st.progress(0)
+                    for i, result_dir in enumerate(target_dirs):
+                        eval_progress.progress((i + 1) / len(target_dirs), f"Evaluating {i+1}/{len(target_dirs)}")
+                        eval_results.append(run_eval_result_for_dir(result_dir, overwrite=False))
+                    eval_progress.empty()
+                    
+                    success_eval = sum(1 for r in eval_results if r["status"] == "success")
+                    failed_eval = sum(1 for r in eval_results if r["status"] == "failed")
+                    st.success(f"✅ Eval complete: {success_eval} success, {failed_eval} failed")
+                    
+                    # Generate summary CSVs
+                    with st.spinner("Generating Summary.csv and Score.csv..."):
+                        csv_info = generate_summary_and_score_csv(resolved_path_str)
+                    st.success(f"Generated Summary.csv ({csv_info['summary_rows']} rows) and Score.csv ({csv_info['score_rows']} rows)")
+                else:
+                    st.warning("⚠️ No eval result directories found")
+            
+            # Step 3: Generate parquet
+            if combined_generate_parquet and CATALOG_IO_AVAILABLE:
+                progress_placeholder.info("📦 Step 3/3: Generating parquet...")
+                pkl_dir = Path(resolved_path_str)
+                all_pkl_files = list(pkl_dir.rglob("*.pkl")) + list(pkl_dir.rglob("*.pkl.z"))
+                pkl_count = len(all_pkl_files)
+                if pkl_count > 0:
+                    with st.spinner(f"Processing {pkl_count} pkl files..."):
+                        parquet_path = pkl_archive_to_parquet(
+                            pkl_dir,
+                            on_progress=None,
+                            on_skip=None,
+                            project_id=project_id,
+                            job_id=st.session_state.job_id,
+                        )
+                    st.success(f"✅ Parquet generated: {parquet_path}")
+                else:
+                    st.warning("⚠️ No .pkl files found for parquet generation")
+            
+            progress_placeholder.empty()
+            status_placeholder.empty()
+            st.success("🎉 Combined workflow complete!")
+            
+            # Show file tree
+            with st.expander("📁 File Structure"):
+                for root, dirs, files in os.walk(resolved_path_str):
+                    level = root.replace(resolved_path_str, '').count(os.sep)
+                    indent = ' ' * 4 * level
+                    st.text(f"{indent}{os.path.basename(root)}/")
+                    subindent = ' ' * 4 * (level + 1)
+                    for file in files:
+                        st.text(f"{subindent}{file}")
+            
+        except Exception as e:
+            st.error(f"❌ Error: {str(e)}")
+            st.exception(e)
+
     # Information section
     with st.expander("ℹ️ How to use"):
         st.markdown("""
@@ -1753,7 +3294,7 @@ def on_suite_id_change():
                         st.error(f"Failed to save: {e}")
                         st.exception(e)
 
-    col1, col2, col3 = st.columns(3)
+    col1, col2 = st.columns(2)
     with col1:
         eval_recursive = st.checkbox(
             "Search subdirectories",
@@ -1766,28 +3307,10 @@ def on_suite_id_change():
             value=get_config_value("eval_overwrite", False),
             help="If unchecked, directories with result.txt will be skipped",
         )
-    with col3:
-        eval_parallel = st.checkbox(
-            "Run in parallel",
-            value=get_config_value("eval_parallel", False),
-            help="Temporarily disabled. Parallel execution currently provides no measurable benefit.",    
-            disabled=True
-        )
-        if eval_parallel:
-            eval_workers = st.number_input(
-                "Eval worker threads",
-                min_value=1,
-                max_value=16,
-                value=get_config_value("eval_workers", 1),
-                help="Number of parallel threads used to run eval_result",
-            )   
-            set_config_value("eval_workers", eval_workers)
-        else:
-            eval_workers = 1
-            set_config_value("eval_workers", eval_workers)
+    eval_workers = _default_eval_workers()
+    set_config_value("eval_workers", eval_workers)
     set_config_value("eval_recursive", eval_recursive)
     set_config_value("eval_overwrite", eval_overwrite)
-    set_config_value("eval_parallel", eval_parallel)
 
     # New option: Only generate summary/score csv
     only_generate_summary = st.checkbox(
@@ -1885,6 +3408,7 @@ def _emit_eval_finished_notification(message: str):
                         "eval_root": eval_path,
                         "recursive": eval_recursive,
                         "overwrite": eval_overwrite,
+                        "eval_workers": eval_workers,
                     })
                 if tid:
                     enqueued.append(f"{'generate_summary_csv' if only_generate_summary else 'run_eval_dirs'} ({tid[:8]}...)")
@@ -2046,30 +3570,23 @@ def _update_progress_status(done: int, total_dirs: int):
                 )
 
             try:
-                # sequential evaluation
-                if not eval_parallel:
-                    for i, result_dir in enumerate(target_dirs):
-                        _update_progress_status(i, total)
-                        results.append(run_eval_result_for_dir(result_dir, overwrite=eval_overwrite))
-                        _update_progress_status(i + 1, total)
-                else:
-                    max_workers = max(1, min(int(eval_workers), len(target_dirs)))
-                    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                        future_map = {
-                            executor.submit(_run_eval_result_worker, result_dir, eval_overwrite): result_dir
-                            for result_dir in target_dirs
-                        }
-                        completed = 0
-                        for future in as_completed(future_map):
-                            completed += 1
-                            _update_progress_status(completed, total)
-                            try:
-                                results.append(future.result())
-                            except Exception as e:
-                                result_dir = future_map.get(future, "unknown")
-                                results.append(
-                                    {"path": result_dir, "status": "failed", "detail": str(e)}
-                                )
+                max_workers = max(1, min(int(eval_workers), len(target_dirs)))
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    future_map = {
+                        executor.submit(_run_eval_result_worker, result_dir, eval_overwrite): result_dir
+                        for result_dir in target_dirs
+                    }
+                    completed = 0
+                    for future in as_completed(future_map):
+                        completed += 1
+                        _update_progress_status(completed, total)
+                        try:
+                            results.append(future.result())
+                        except Exception as e:
+                            result_dir = future_map.get(future, "unknown")
+                            results.append(
+                                {"path": result_dir, "status": "failed", "detail": str(e)}
+                            )
 
                 _update_progress_status(total, total)
             finally:
@@ -2106,4 +3623,4 @@ def _update_progress_status(done: int, total_dirs: int):
                     if notify_when_done:
                         _emit_eval_finished_notification(
                             f"Eval run finished with CSV error. Success: {success_count}, Skipped: {skipped_count}, Failed: {failed_count}. {e}"
-                        )
\ No newline at end of file
+                        )
diff --git a/evaluation_dashboard_app/pages/6_Workflow.py b/evaluation_dashboard_app/pages/6_Workflow.py
new file mode 100644
index 0000000..33e9816
--- /dev/null
+++ b/evaluation_dashboard_app/pages/6_Workflow.py
@@ -0,0 +1,2548 @@
+"""
+Evaluator Workflow page:
+- browse finished local runs and launch compare views
+- monitor server-side tasks
+- start new evaluator pipelines
+- run download/eval from existing evaluator jobs
+"""
+
+from __future__ import annotations
+
+import html
+import io
+import json
+import os
+import re
+import urllib.parse
+import zipfile
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import streamlit as st
+import requests
+
+from lib.db import count_recent_tasks, create_task, is_task_queue_enabled, list_recent_tasks, update_task_rq_job_id
+from lib.page_chrome import (
+    inject_app_page_styles,
+    render_page_hero,
+    section_header,
+)
+from lib.path_utils import (
+    delete_run,
+    format_size,
+    get_data_root_display,
+    get_run_info,
+    list_run_directories,
+    resolve_run_subdirectory,
+    resolve_under_data_root,
+)
+from lib.run_metadata import (
+    build_run_search_blob,
+    read_run_metadata,
+    upsert_run_metadata,
+)
+from lib.specsheet_report import (
+    DEFAULT_TREND_TOPIC,
+    DETECTION_TREND_TOPIC_BY_MODEL,
+    parse_trend_metadata_text,
+)
+from lib.ui.recent_evaluator_jobs import (
+    _fetch_evaluator_job_detail,
+    _format_source_ref_html,
+    _format_source_ref_text,
+    _render_recent_evaluator_job_retest_dialog,
+    _render_recent_evaluator_jobs_section,
+    configure_recent_evaluator_jobs_ui,
+)
+from lib.ui.task_history import get_task_list_current_user, render_task_list
+from lib.ui.styles_download import inject_download_page_styles
+from lib.user_config import UserConfig
+
+try:
+    from lib.perception_catalog_io import pkl_archive_to_parquet
+
+    CATALOG_IO_AVAILABLE = True
+except ImportError:
+    CATALOG_IO_AVAILABLE = False
+
+_JST = timezone(timedelta(hours=9))
+_TASK_LIST_MAX_ROWS = 200
+_TASK_LIST_SINCE_DAYS = 7
+_RELEASE_PERFORMANCE_CATALOG_ID = "e36d75b9-6c3a-4970-9b9b-5cd13f7a9da3"
+_RELEASE_PERFORMANCE_INTEGRATION_ID = "96ad8fba-0228-4c2b-9166-07d4de1a0760"
+_RELEASE_DEVOPS_CATALOG_ID = "ab0f8498-cc1b-4726-836f-e18e8bcb3200"
+_RELEASE_DEVOPS_INTEGRATION_ID = "295cff78-9bc9-4d60-b7aa-f95be6ff96a4"
+_RELEASE_OPTIONAL_CATALOG_ID = "09039022-ec91-41bf-9e93-fdefccdfc9bc"
+_RELEASE_SKIP_LARGE_FILE = True
+_RELEASE_LARGE_FILE_MB = 50.0
+_RELEASE_TREND_TOPIC_OPTIONS = {
+    "Prediction / object recognition": DEFAULT_TREND_TOPIC,
+    "ML model / CenterPoint": DETECTION_TREND_TOPIC_BY_MODEL["centerpoint"],
+    "ML model / BEVFusion": DETECTION_TREND_TOPIC_BY_MODEL["bevfusion"],
+    "Custom": "",
+}
+_TASK_HISTORY_RANGE_OPTIONS = {
+    "7 days": 7,
+    "30 days": 30,
+    "90 days": 90,
+    "All": None,
+}
+
+
+st.set_page_config(
+    page_title="Evaluator Workflow",
+    layout="wide",
+    initial_sidebar_state="collapsed",
+)
+inject_app_page_styles()
+inject_download_page_styles()
+
+
+_user_config = UserConfig(warning_fn=st.warning)
+
+
+def get_config_value(key: str, default=None):
+    return _user_config.get(key, default)
+
+
+def set_config_value(key: str, value) -> None:
+    _user_config.set(key, value)
+
+
+def _to_jst(dt):
+    if dt is None:
+        return None
+    try:
+        if getattr(dt, "tzinfo", None) is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt.astimezone(_JST)
+    except Exception:
+        return None
+
+
+def _load_catalog_presets():
+    app_root = Path(__file__).parent.parent
+    catalogs_filename = "catalogs.json"
+    search_paths = [
+        app_root / catalogs_filename,
+        Path(os.environ.get("CATALOGS_PATH", "")),
+        Path.cwd() / catalogs_filename,
+    ]
+    catalogs = []
+    loaded_path = None
+    load_error = None
+    for path in search_paths:
+        if path.exists() and path.is_file():
+            try:
+                with open(path, "r", encoding="utf-8") as handle:
+                    data = json.load(handle)
+                if isinstance(data, dict):
+                    catalogs = data.get("catalogs", [])
+                elif isinstance(data, list):
+                    catalogs = data
+                else:
+                    catalogs = []
+                loaded_path = str(path)
+                load_error = None
+                break
+            except Exception as exc:
+                load_error = str(exc)
+    presets = []
+    for item in catalogs:
+        if not isinstance(item, dict):
+            continue
+        display_name = item.get("display_name") or item.get("name") or item.get("catalog_id", "Unknown")
+        presets.append({**item, "display_name": display_name})
+    return presets, loaded_path, load_error
+
+
+def _fetch_server_catalogs(project_id: str, environment: str) -> List[Dict[str, str]]:
+    """Fetch available catalogs for the project on demand."""
+    if not project_id:
+        return []
+    import os
+    from lib.WebAPI import catalogAPI
+
+    os.environ["AUTH_PROFILE"] = environment or "default"
+    response = catalogAPI(project_id=project_id).list_catalogs()
+    response.raise_for_status()
+    data = response.json()
+    raw_catalogs = data.get("catalogs", []) if isinstance(data, dict) else data
+    options: List[Dict[str, str]] = []
+    for item in raw_catalogs or []:
+        if not isinstance(item, dict):
+            continue
+        catalog_id = str(item.get("id") or item.get("catalog_id") or "").strip()
+        display_name = str(item.get("display_name") or item.get("name") or catalog_id).strip()
+        if not catalog_id or not display_name:
+            continue
+        options.append(
+            {
+                "catalog_id": catalog_id,
+                "display_name": display_name,
+                "description": str(item.get("description") or "").strip(),
+            }
+        )
+    options.sort(key=lambda item: item["display_name"].lower())
+    return options
+
+
+def _resolve_integration_id_for_catalog(project_id: str, environment: str, catalog_id: str) -> str:
+    """Resolve the most relevant active integration for a catalog."""
+    if not project_id or not catalog_id:
+        return ""
+    from lib import evaluator_api
+
+    os.environ["AUTH_PROFILE"] = environment or "default"
+    api = evaluator_api.EvaluationRunAPI()
+    url = f"{api.api_base_url}/projects/{project_id}/integrations"
+    response = api.request(url, {"catalog_id": catalog_id, "size": 100}, method="GET")
+    if response is None:
+        raise RuntimeError("No response returned while loading integrations.")
+    if response.status_code != 200:
+        raise RuntimeError(f"Failed to load integrations: status={response.status_code}")
+
+    payload = json.loads(response.content)
+    integrations = payload.get("integrations", []) or []
+    active = [
+        item for item in integrations
+        if isinstance(item, dict)
+        and str(item.get("catalog_id") or "").strip() == catalog_id
+        and not bool(item.get("deleted"))
+    ]
+    if not active:
+        raise RuntimeError("No active integration was found for the selected catalog.")
+
+    def _sort_key(item: Dict[str, object]) -> tuple:
+        return (
+            str(item.get("updated_at") or ""),
+            int(item.get("version_id") or 0),
+            str(item.get("id") or ""),
+        )
+
+    active.sort(key=_sort_key, reverse=True)
+    return str(active[0].get("id") or "").strip()
+
+
+def _enqueue_task(task_type: str, params: dict) -> Optional[str]:
+    try:
+        session_id = get_task_list_current_user()
+        task_id = create_task(task_type, params, session_id=session_id)
+        if not task_id:
+            return None
+
+        from redis import Redis
+        from rq import Queue
+        from worker.tasks import run_job
+
+        redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379")
+        redis_conn = Redis.from_url(redis_url)
+        queue = Queue(
+            name=os.environ.get("RQ_QUEUE", "default"),
+            connection=redis_conn,
+            default_timeout="7d",
+        )
+        job = queue.enqueue(
+            run_job,
+            task_id,
+            task_type,
+            params,
+            job_timeout="7d",
+            result_ttl="7d",
+        )
+        rq_id = getattr(job, "id", None)
+        if rq_id:
+            update_task_rq_job_id(task_id, str(rq_id))
+        return task_id
+    except Exception as exc:
+        st.error(f"Failed to enqueue task: {exc}")
+        return None
+
+
+def _make_default_output_path(branch_name: str) -> str:
+    import re
+
+    clean_branch = re.sub(r"[^\w]", "_", branch_name.strip("/")) if branch_name else "eval"
+    clean_branch = re.sub(r"_+", "_", clean_branch).strip("_")
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"eval_{clean_branch}_{ts}"
+
+
+def _safe_output_part(value: object, fallback: str) -> str:
+    text = re.sub(r"[^\w.\-]+", "_", str(value or "").strip()).strip("._")
+    return text or fallback
+
+
+def _catalog_preset_emoji(preset_name: str, *, has_custom_catalog: bool = False) -> str:
+    mapping = {
+        "Build Test Catalog": "🛠️",
+        "Performance Test": "📈",
+        "Old performance test": "🕰️",
+        "Devops Test": "⚙️",
+        "Usecase Performance Catalog": "🧭",
+        "L4 regression test": "⚠️",
+    }
+    normalized = str(preset_name or "").strip()
+    if normalized in mapping:
+        return mapping[normalized]
+    if has_custom_catalog:
+        return "🧩"
+    return "📦"
+
+
+def _make_auto_workflow_description(
+    target_name: str,
+    preset_name: str = "",
+    *,
+    has_custom_catalog: bool = False,
+) -> str:
+    import re
+
+    clean_target = str(target_name or "").strip() or "default"
+    clean_target = re.sub(r"\s+", " ", clean_target)
+    stamp = datetime.now().strftime("%m-%d %H:%M")
+    return (
+        f"🚀 evaluator workflow [{clean_target}] [{stamp}] "
+        f"{_catalog_preset_emoji(preset_name, has_custom_catalog=has_custom_catalog)}"
+    )
+
+
+def _make_auto_release_workflow_description(target_name: str) -> str:
+    clean_target = str(target_name or "").strip() or "default"
+    clean_target = re.sub(r"\s+", " ", clean_target)
+    stamp = datetime.now().strftime("%m-%d %H:%M")
+    return f"🚀 release workflow [{clean_target}] [{stamp}]"
+
+
+def _make_default_release_pilot_auto_version(target_name: str) -> str:
+    target = str(target_name or "").strip()
+    match = re.search(r"v?(\d+\.\d+\.\d+)", target)
+    if match:
+        return f"Pilot.Auto v{match.group(1)}"
+    return f"Pilot.Auto {target}" if target else "Pilot.Auto release"
+
+
+def _make_default_release_metadata_text(target_name: str) -> str:
+    release_group = _safe_output_part(target_name, "release")
+    pilot_auto_version = _make_default_release_pilot_auto_version(target_name)
+    description = f"{target_name} release data update" if target_name else "Release data update"
+    date = datetime.now(_JST).strftime("%Y.%m.%d")
+    return (
+        "tags: [trend]\n"
+        f"release_group: {release_group}\n"
+        f'pilot_auto_version: "{pilot_auto_version}"\n'
+        f"version_abbr: {_safe_output_part(pilot_auto_version.replace('Pilot.Auto', '').strip(), 'release')[:16]}\n"
+        "data_count: 99,776+\n"
+        f"description: {description}\n"
+        f"date: {date}\n"
+        f"topic_name: {DEFAULT_TREND_TOPIC}\n"
+    )
+
+
+def _extract_release_metadata_topic(text: str) -> str:
+    try:
+        metadata = parse_trend_metadata_text(text)
+        return str(metadata.get("topic_name") or DEFAULT_TREND_TOPIC).strip()
+    except Exception:
+        match = re.search(r"(?m)^topic_name\s*:\s*['\"]?([^'\"\n#]+)", text or "")
+        return match.group(1).strip() if match else DEFAULT_TREND_TOPIC
+
+
+def _replace_release_metadata_topic(text: str, topic: str) -> str:
+    topic = str(topic or "").strip()
+    if not topic:
+        return text
+    line = f"topic_name: {topic}"
+    if re.search(r"(?m)^topic_name\s*:", text or ""):
+        return re.sub(r"(?m)^topic_name\s*:.*$", line, text)
+    return (text.rstrip() + "\n" + line + "\n") if text else line + "\n"
+
+
+def _format_run_mtime(mtime: float) -> str:
+    if not mtime:
+        return "—"
+    try:
+        return datetime.fromtimestamp(mtime, tz=_JST).strftime("%Y-%m-%d %H:%M JST")
+    except Exception:
+        return "—"
+
+
+def _build_overview_url(run_a: str, compare_runs: Optional[List[str]] = None) -> str:
+    query = {"mode": "single", "run_a": run_a}
+    valid_compare_runs = [str(name).strip() for name in (compare_runs or []) if str(name).strip()]
+    if valid_compare_runs:
+        query["mode"] = "compare"
+        for idx, run_name in enumerate(valid_compare_runs[:4]):
+            query[f"run_{chr(98 + idx)}"] = run_name
+    return f"/?{urllib.parse.urlencode(query)}"
+
+
+def _format_metadata_time(value: object) -> str:
+    if not value:
+        return "—"
+    if isinstance(value, datetime):
+        dt = value
+    else:
+        try:
+            dt = datetime.fromisoformat(str(value).replace("Z", "+00:00"))
+        except Exception:
+            return str(value)
+    if getattr(dt, "tzinfo", None) is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    try:
+        return dt.astimezone(_JST).strftime("%Y-%m-%d %H:%M JST")
+    except Exception:
+        return str(value)
+
+
+def _metadata_text(value: object) -> str:
+    text = str(value or "").strip()
+    return text or "—"
+
+
+def _run_user_label(subject_id: str, environment: str) -> str:
+    subject = str(subject_id or "").strip()
+    if not subject:
+        return "(Auto)"
+    if not subject.startswith("t4:"):
+        return subject
+    try:
+        profile = _resolve_subject_name(subject, environment or "default")
+        name = str(profile.get("name") or subject).strip()
+        return name or subject
+    except Exception:
+        return "(Auto)"
+
+
+def _catalog_url(project_id: str, catalog_id: str, metadata_url: str = "") -> str:
+    direct_url = str(metadata_url or "").strip()
+    if direct_url:
+        return direct_url
+    project = str(project_id or "").strip()
+    catalog = str(catalog_id or "").strip()
+    if project and catalog:
+        return f"https://evaluation.tier4.jp/evaluation/vehicle_catalogs/{catalog}?project_id={project}"
+    return ""
+
+
+@st.cache_data(ttl=24 * 3600, show_spinner=False)
+def _catalog_preset_name_map() -> Dict[str, str]:
+    presets, _, _ = _load_catalog_presets()
+    mapping: Dict[str, str] = {}
+    for item in presets:
+        if not isinstance(item, dict):
+            continue
+        catalog_id = str(item.get("catalog_id") or "").strip()
+        display_name = str(item.get("display_name") or item.get("name") or "").strip()
+        if catalog_id and display_name:
+            mapping[catalog_id] = display_name
+    return mapping
+
+
+def _catalog_label_for_run(catalog_id: str, catalog_name: str) -> str:
+    resolved_name = str(catalog_name or "").strip()
+    if resolved_name:
+        return resolved_name
+    catalog = str(catalog_id or "").strip()
+    if not catalog:
+        return "—"
+    preset_match = _catalog_preset_name_map().get(catalog, "").strip()
+    return preset_match or catalog
+
+
+@st.cache_data(ttl=15, show_spinner=False)
+def _load_local_runs() -> List[Dict[str, object]]:
+    runs: List[Dict[str, object]] = []
+    for run_path in list_run_directories():
+        info = get_run_info(run_path)
+        metadata = read_run_metadata(run_path)
+        task_meta = metadata.get("task") if isinstance(metadata.get("task"), dict) else {}
+        request_meta = metadata.get("request") if isinstance(metadata.get("request"), dict) else {}
+        evaluator_meta = metadata.get("evaluator") if isinstance(metadata.get("evaluator"), dict) else {}
+        description = str(
+            request_meta.get("description")
+            or evaluator_meta.get("description")
+            or ""
+        ).strip()
+        requested_by = str(
+            evaluator_meta.get("scheduled_by")
+            or task_meta.get("requested_by")
+            or ""
+        ).strip()
+        environment = str(request_meta.get("environment") or "default").strip() or "default"
+        requested_by_label = _run_user_label(requested_by, environment)
+        task_type = str(task_meta.get("type") or metadata.get("source_mode") or "").strip()
+        task_status = str(task_meta.get("status") or "").strip()
+        evaluator_job_id = str(
+            evaluator_meta.get("job_id")
+            or request_meta.get("job_id")
+            or ""
+        ).strip()
+        evaluator_report_url = str(evaluator_meta.get("report_url") or "").strip()
+        evaluator_title = str(
+            evaluator_meta.get("title")
+            or description
+            or evaluator_job_id
+            or ""
+        ).strip()
+        evaluator_target = str(
+            evaluator_meta.get("target")
+            or request_meta.get("target_name")
+            or ""
+        ).strip()
+        catalog_id = str(
+            evaluator_meta.get("catalog_id")
+            or request_meta.get("catalog_id")
+            or ""
+        ).strip()
+        catalog_name = str(evaluator_meta.get("catalog_name") or "").strip()
+        catalog_label = _catalog_label_for_run(catalog_id, catalog_name)
+        catalog_url = _catalog_url(
+            str(request_meta.get("project_id") or "").strip(),
+            catalog_id,
+            str(evaluator_meta.get("catalog_url") or "").strip(),
+        )
+        case_totals = evaluator_meta.get("case_totals") if isinstance(evaluator_meta.get("case_totals"), dict) else {}
+        passed_count = int(case_totals.get("success", 0) or 0)
+        failed_count = int(case_totals.get("failed", 0) or 0)
+        canceled_count = int(case_totals.get("canceled", 0) or 0)
+        search_blob = build_run_search_blob(
+            run_path,
+            metadata,
+            extra_values=[
+                description,
+                requested_by,
+                requested_by_label,
+                task_type,
+                task_status,
+                evaluator_job_id,
+                catalog_id,
+                catalog_name,
+                evaluator_target,
+            ],
+        )
+        runs.append(
+            {
+                "name": info["name"],
+                "run_path": run_path,
+                "path_display": f"{get_data_root_display()}/{info['name']}",
+                "size": format_size(info["size_bytes"]),
+                "mtime": float(info["mtime"] or 0),
+                "mtime_date": _to_jst(datetime.fromtimestamp(float(info["mtime"] or 0), tz=timezone.utc)).date() if info["mtime"] else None,
+                "modified": _format_run_mtime(info["mtime"]),
+                "has_summary": bool(info["has_summary"]),
+                "has_score": bool(info["has_score"]),
+                "has_parquet": bool(info["has_parquet"]),
+                "metadata": metadata,
+                "description": description,
+                "requested_by": requested_by,
+                "requested_by_label": requested_by_label,
+                "environment": environment,
+                "project_id": str(request_meta.get("project_id") or "").strip(),
+                "task_type": task_type,
+                "task_status": task_status,
+                "evaluator_job_id": evaluator_job_id,
+                "evaluator_report_url": evaluator_report_url,
+                "evaluator_title": evaluator_title,
+                "evaluator_target": evaluator_target,
+                "branch_label": evaluator_target,
+                "evaluator_git_sha": str(evaluator_meta.get("git_sha") or "").strip(),
+                "evaluator_git_ref_url": str(evaluator_meta.get("git_ref_url") or "").strip(),
+                "evaluator_git_commit_url": str(evaluator_meta.get("git_commit_url") or "").strip(),
+                "evaluator_source_url": str(evaluator_meta.get("source_url") or "").strip(),
+                "evaluator_source_repo_label": str(evaluator_meta.get("source_repo_label") or "").strip(),
+                "catalog_id": catalog_id,
+                "catalog_name": catalog_name,
+                "catalog_label": catalog_label,
+                "catalog_url": catalog_url,
+                "passed_count": passed_count,
+                "failed_count": failed_count,
+                "canceled_count": canceled_count,
+                "search_blob": search_blob,
+            }
+        )
+    runs.sort(key=lambda row: (-float(row["mtime"]), str(row["name"]).lower()))
+    return runs
+
+
+@st.cache_data(ttl=24 * 3600, show_spinner=False)
+def _resolve_subject_name(subject_id: str, environment: str) -> Dict[str, str]:
+    subject = str(subject_id or "").strip()
+    if not subject or not subject.startswith("t4:"):
+        return {"subject_id": subject, "name": subject, "email": ""}
+    org_id = os.environ.get(
+        "WEBAUTO_ORGANIZATION_ID",
+        "5a21621d-6968-4f7d-94f8-99cfb77b6e71",
+    ).strip()
+    if not org_id:
+        return {"subject_id": subject, "name": subject, "email": ""}
+    os.environ["AUTH_PROFILE"] = environment or "default"
+    from webautoauth.token import HttpService, TokenSource, load_config
+
+    config = load_config()
+    token_source = TokenSource(HttpService(config))
+    access_token = token_source.get_token().access_token
+    quoted_subject = urllib.parse.quote(subject, safe="")
+    url = f"https://auth.web.auto/v2/organizations/{org_id}/members/{quoted_subject}"
+    response = requests.get(
+        url,
+        headers={"Authorization": f"Bearer {access_token}", "accept": "application/json"},
+        timeout=10,
+    )
+    response.raise_for_status()
+    data = response.json()
+    return {
+        "subject_id": str(data.get("subject_id") or subject).strip(),
+        "name": str(data.get("name") or subject).strip(),
+        "email": str(data.get("email") or "").strip(),
+    }
+
+
+def _inject_workflow_page_styles() -> None:
+    st.markdown(
+        """
+        <style>
+        .wf-toolbar-note {
+            margin: 0 0 0.28rem 0;
+            font-size: 0.66rem;
+            letter-spacing: 0.12em;
+            text-transform: uppercase;
+            color: #64748b;
+            font-weight: 700;
+        }
+        .wf-panel {
+            border: 1px solid rgba(148, 163, 184, 0.24);
+            background: linear-gradient(180deg, rgba(255,255,255,0.98) 0%, rgba(248,250,252,0.98) 100%);
+            border-radius: 18px;
+            padding: 1rem 1rem 0.85rem 1rem;
+            box-shadow: 0 18px 50px -28px rgba(15, 23, 42, 0.22);
+            margin-bottom: 1rem;
+        }
+        .wf-panel-title {
+            margin: 0;
+            font-size: 1rem;
+            font-weight: 800;
+            color: #0f172a;
+            letter-spacing: -0.02em;
+        }
+        .wf-panel-copy {
+            margin: 0.3rem 0 0 0;
+            color: #475569;
+            font-size: 0.9rem;
+            line-height: 1.5;
+        }
+        .wf-filter-strip,
+        .wf-pager-strip {
+            border: none;
+            background: linear-gradient(180deg, rgba(248,250,252,0.72) 0%, rgba(248,250,252,0.28) 100%);
+            border-radius: 14px;
+            padding: 0.72rem 0.78rem 0.28rem 0.78rem;
+            box-shadow: none;
+            margin-bottom: 0.32rem;
+        }
+        .wf-filter-strip {
+            margin-top: 0.12rem;
+        }
+        .wf-pager-strip {
+            padding-top: 0.28rem;
+            padding-bottom: 0.28rem;
+        }
+        .wf-pager-summary {
+            padding-top: 0.2rem;
+            color: #475569;
+            font-size: 0.82rem;
+            line-height: 1.35;
+        }
+        .wf-pager-summary strong {
+            color: #0f172a;
+            font-weight: 700;
+        }
+        .wf-meta-inline {
+            margin-top: 0.2rem;
+            color: #64748b;
+            font-size: 0.78rem;
+        }
+        .wf-meta-inline a {
+            color: inherit;
+            text-decoration: none;
+        }
+        .wf-meta-inline a:hover {
+            text-decoration: underline;
+        }
+        .wf-linked-ref {
+            margin-top: 0.35rem;
+            color: #475569;
+            font-size: 0.82rem;
+            line-height: 1.35;
+        }
+        .wf-linked-ref a {
+            color: #0f766e;
+            text-decoration: none;
+            font-weight: 600;
+        }
+        .wf-linked-ref a:hover {
+            text-decoration: underline;
+        }
+        .wf-run-list {
+            display: block;
+            margin-top: 0.35rem;
+        }
+        .wf-run-name {
+            min-width: 0;
+        }
+        .wf-run-title {
+            font-size: 0.8rem;
+            line-height: 1.2;
+            font-weight: 700;
+            color: #0f172a;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+        .wf-run-title a {
+            color: inherit;
+            text-decoration: none;
+        }
+        .wf-run-title a:hover {
+            text-decoration: underline;
+        }
+        .wf-run-title--muted,
+        .wf-run-title--muted a {
+            color: #94a3b8 !important;
+        }
+        .wf-run-cell {
+            min-width: 0;
+            color: #0f172a;
+            font-size: 0.78rem;
+            line-height: 1.15;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+        .wf-run-cell--muted {
+            color: #94a3b8;
+        }
+        .wf-run-text {
+            padding-top: 0.26rem;
+        }
+        .wf-meta-inline--muted {
+            color: #94a3b8;
+        }
+        .wf-run-code {
+            padding-top: 0.22rem;
+            font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+            font-size: 0.74rem;
+            line-height: 1.22;
+            color: #0f172a;
+            white-space: normal;
+            overflow-wrap: anywhere;
+            word-break: break-all;
+        }
+        .wf-run-flags {
+            display: flex;
+            flex-wrap: nowrap;
+            gap: 0.24rem;
+            padding-top: 0.18rem;
+            overflow: hidden;
+        }
+        .wf-flag {
+            display: inline-flex;
+            align-items: center;
+            padding: 0.12rem 0.38rem;
+            border-radius: 999px;
+            font-size: 0.68rem;
+            font-weight: 700;
+            letter-spacing: 0.02em;
+            background: #e2e8f0;
+            color: #475569;
+            white-space: nowrap;
+        }
+        .wf-flag--ok {
+            background: #dcfce7;
+            color: #166534;
+        }
+        .wf-run-flags--muted {
+            opacity: 0.58;
+        }
+        .wf-unavailable-note {
+            margin-top: 0.18rem;
+            font-size: 0.68rem;
+            color: #94a3b8;
+            letter-spacing: 0.01em;
+        }
+        .wf-compare-bar {
+            border: none;
+            background: linear-gradient(135deg, rgba(248,250,252,0.65) 0%, rgba(236,254,255,0.55) 100%);
+            border-radius: 12px;
+            padding: 0.62rem 0.78rem;
+            margin: 0.18rem 0 0.4rem 0;
+        }
+        .wf-compare-title {
+            margin: 0;
+            font-size: 0.8rem;
+            font-weight: 800;
+            color: #0f172a;
+            letter-spacing: 0.01em;
+        }
+        [class*="st-key-workflow_compare_pick__"] label[data-testid="stWidgetLabel"] {
+            display: none;
+        }
+        [class*="st-key-workflow_compare_pick__"] div[data-testid="stCheckbox"] {
+            display: flex;
+            justify-content: center;
+            padding-top: 0.1rem;
+        }
+        [class*="st-key-workflow_compare_pick__"] input[type="checkbox"] {
+            transform: scale(1.2);
+        }
+        [class*="st-key-workflow_runs_page_select"] div[data-baseweb="select"] {
+            min-height: 1.72rem;
+        }
+        [class*="st-key-workflow_runs_page_select"] [data-baseweb="select"] > div {
+            min-height: 1.72rem;
+            font-size: 0.8rem;
+        }
+        [class*="st-key-workflow_runs_page_prev"] button,
+        [class*="st-key-workflow_runs_page_next"] button {
+            min-height: 1.72rem;
+            height: 1.72rem;
+            padding: 0 0.3rem;
+            font-size: 0.8rem;
+            line-height: 1;
+        }
+        [class*="st-key-workflow_run_details__"] button,
+        [class*="st-key-workflow_run_download__"] button,
+        [class*="st-key-workflow_run_delete__"] button,
+        [class*="st-key-workflow_local_run_retest__"] button {
+            white-space: nowrap;
+            min-height: 2.2rem;
+            font-size: 0.72rem;
+            padding-left: 0.35rem;
+            padding-right: 0.35rem;
+            letter-spacing: 0.01em;
+        }
+        .wf-launcher {
+            border: 1px solid rgba(20, 184, 166, 0.22);
+            background: linear-gradient(135deg, #f0fdfa 0%, #ffffff 100%);
+            border-radius: 14px;
+            padding: 0.85rem 1rem;
+            margin-bottom: 0.8rem;
+        }
+        .wf-launcher-title {
+            margin: 0;
+            font-size: 0.95rem;
+            font-weight: 800;
+            color: #0f172a;
+        }
+        .wf-launcher-copy {
+            margin: 0.25rem 0 0 0;
+            font-size: 0.84rem;
+            color: #475569;
+        }
+        .wf-launcher-meta {
+            margin-top: 0.55rem;
+            font-size: 0.78rem;
+            color: #475569;
+        }
+        .wf-empty {
+            border: 1px dashed rgba(148, 163, 184, 0.45);
+            border-radius: 12px;
+            background: rgba(248, 250, 252, 0.8);
+            padding: 0.8rem 0.9rem;
+            color: #475569;
+            font-size: 0.84rem;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+
+def _build_local_run_artifact_list(run_name: str) -> tuple[Optional[Path], list[tuple[Path, str]], str]:
+    run_path, err = resolve_run_subdirectory(run_name)
+    if err:
+        return None, [], err
+    assert run_path is not None
+    to_zip: list[tuple[Path, str]] = []
+    summary_file = run_path / "Summary.csv"
+    score_file = run_path / "Score.csv"
+    if summary_file.is_file():
+        to_zip.append((summary_file, "Summary.csv"))
+    if score_file.is_file():
+        to_zip.append((score_file, "Score.csv"))
+    for pq in sorted(run_path.glob("*.parquet"), key=lambda p: p.name.lower()):
+        to_zip.append((pq, pq.name))
+    return run_path, to_zip, ""
+
+
+def _render_local_run_download_dialog(run_name: str) -> None:
+    run_path, to_zip, err = _build_local_run_artifact_list(run_name)
+    if err:
+        st.error(err)
+        return
+    if run_path is None:
+        st.error("Run path could not be resolved.")
+        return
+
+    prepared_key = f"workflow_zip_prepared::{run_name}"
+    st.caption("Download the generated local artifacts for this run as one ZIP.")
+    if not to_zip:
+        st.info("This run has no Summary.csv, Score.csv, or top-level `.parquet` files.")
+        return
+
+    st.caption(f"**{len(to_zip)}** file(s): {', '.join(arc for _, arc in to_zip)}")
+    prepared = st.session_state.get(prepared_key)
+
+    if st.button("Prepare ZIP", key=f"workflow_prepare_zip::{run_name}", use_container_width=True):
+        buf = io.BytesIO()
+        zip_errors: list[str] = []
+        included: list[str] = []
+        with st.spinner("Building ZIP…"):
+            with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
+                for fpath, arcname in to_zip:
+                    try:
+                        zf.write(fpath, arcname=arcname)
+                        included.append(arcname)
+                    except OSError as exc:
+                        zip_errors.append(f"{arcname}: {exc}")
+        for msg in zip_errors:
+            st.warning(msg)
+        if included:
+            safe_stem = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", run_name).strip() or "run"
+            st.session_state[prepared_key] = {
+                "data": buf.getvalue(),
+                "file_name": f"{safe_stem}_artifacts.zip",
+            }
+            prepared = st.session_state.get(prepared_key)
+        else:
+            st.session_state.pop(prepared_key, None)
+            prepared = None
+            st.error("Could not add any files to the ZIP.")
+
+    if prepared and prepared.get("data"):
+        st.download_button(
+            label=f"Download {prepared['file_name']}",
+            data=prepared["data"],
+            file_name=prepared["file_name"],
+            mime="application/zip",
+            key=f"workflow_dl_zip::{run_name}",
+            use_container_width=True,
+        )
+
+
+def _render_local_run_delete_dialog(run_name: str) -> None:
+    st.warning("This deletes the local run directory permanently.")
+    confirm = st.text_input(
+        "Type the run name to confirm",
+        value="",
+        placeholder=run_name,
+        key=f"workflow_delete_confirm::{run_name}",
+    ).strip()
+    if st.button("Delete run", key=f"workflow_delete_btn::{run_name}", type="primary", use_container_width=True):
+        if confirm != run_name:
+            st.error("Confirmation text does not match the run name.")
+            return
+        ok, msg = delete_run(run_name)
+        if ok:
+            st.session_state.pop("workflow_local_run_detail", None)
+            st.session_state.pop("workflow_local_run_download", None)
+            st.session_state.pop("workflow_local_run_delete", None)
+            st.session_state.pop(f"workflow_zip_prepared::{run_name}", None)
+            st.success(msg)
+            _load_local_runs.clear()
+            st.rerun()
+        st.error(msg)
+
+
+def _render_local_runs_header() -> None:
+    header_cols = st.columns([0.45, 2.35, 0.72, 1.45, 1.55, 1.0, 1.0, 1.22, 0.68, 1.55], gap="small")
+    header_cols[0].markdown('<div class="wf-toolbar-note">Pick</div>', unsafe_allow_html=True)
+    header_cols[1].markdown('<div class="wf-toolbar-note">Name</div>', unsafe_allow_html=True)
+    header_cols[2].markdown('<div class="wf-toolbar-note">User</div>', unsafe_allow_html=True)
+    header_cols[3].markdown('<div class="wf-toolbar-note">Catalog</div>', unsafe_allow_html=True)
+    header_cols[4].markdown('<div class="wf-toolbar-note">Evaluator</div>', unsafe_allow_html=True)
+    header_cols[5].markdown('<div class="wf-toolbar-note">Result</div>', unsafe_allow_html=True)
+    header_cols[6].markdown('<div class="wf-toolbar-note">Updated</div>', unsafe_allow_html=True)
+    header_cols[7].markdown('<div class="wf-toolbar-note">Files</div>', unsafe_allow_html=True)
+    header_cols[8].markdown('<div class="wf-toolbar-note">Size</div>', unsafe_allow_html=True)
+    header_cols[9].markdown('<div class="wf-toolbar-note">Actions</div>', unsafe_allow_html=True)
+
+
+def _run_needs_source_backfill(run: Dict[str, object]) -> bool:
+    return bool(
+        str(run.get("evaluator_job_id") or "").strip()
+        and str(run.get("project_id") or "").strip()
+        and (
+            not str(run.get("evaluator_git_ref_url") or "").strip()
+            or not str(run.get("evaluator_git_commit_url") or "").strip()
+            or not str(run.get("evaluator_source_url") or "").strip()
+            or not str(run.get("evaluator_git_sha") or "").strip()
+        )
+    )
+
+
+def _backfill_local_run_source_metadata(runs: List[Dict[str, object]]) -> Dict[str, int]:
+    updated = 0
+    skipped = 0
+    failed = 0
+    for run in runs:
+        if not _run_needs_source_backfill(run):
+            skipped += 1
+            continue
+        run_path = run.get("run_path")
+        if not isinstance(run_path, Path):
+            failed += 1
+            continue
+        project_id = str(run.get("project_id") or "").strip()
+        environment = str(run.get("environment") or "default").strip() or "default"
+        evaluator_job_id = str(run.get("evaluator_job_id") or "").strip()
+        try:
+            detail = _fetch_evaluator_job_detail(project_id, environment, evaluator_job_id)
+        except Exception:
+            failed += 1
+            continue
+
+        patch = {
+            "evaluator": {
+                "target": str(detail.get("source_label") or run.get("evaluator_target") or "").strip(),
+                "git_sha": str(detail.get("git_sha") or run.get("evaluator_git_sha") or "").strip(),
+                "git_ref_url": str(detail.get("git_ref_url") or run.get("evaluator_git_ref_url") or "").strip(),
+                "git_commit_url": str(detail.get("git_commit_url") or run.get("evaluator_git_commit_url") or "").strip(),
+                "source_url": str(detail.get("source_url") or run.get("evaluator_source_url") or "").strip(),
+                "source_repo_label": str(detail.get("source_repo_label") or run.get("evaluator_source_repo_label") or "").strip(),
+                "catalog_name": str(detail.get("catalog") or run.get("catalog_name") or "").strip(),
+                "catalog_url": str(detail.get("catalog_url") or run.get("catalog_url") or "").strip(),
+            }
+        }
+        try:
+            upsert_run_metadata(run_path, patch, create_missing=False)
+            updated += 1
+        except Exception:
+            failed += 1
+    return {"updated": updated, "skipped": skipped, "failed": failed}
+
+
+def _render_local_run_row(run: Dict[str, object], *, selected: bool) -> bool:
+    name_raw = str(run["name"])
+    name = html.escape(name_raw)
+    modified = html.escape(str(run["modified"]))
+    user_label = html.escape(str(run.get("requested_by_label") or "—"))
+    catalog_label = html.escape(str(run.get("catalog_label") or run.get("catalog_id") or "—"))
+    catalog_url = html.escape(str(run.get("catalog_url") or ""))
+    evaluator_job_id = str(run.get("evaluator_job_id") or "").strip()
+    evaluator_report_url = str(run.get("evaluator_report_url") or "").strip()
+    evaluator_target = str(run.get("evaluator_target") or "").strip()
+    description = str(run.get("description") or "").strip()
+    evaluator_title = html.escape(str(run.get("evaluator_title") or description or evaluator_job_id or "—"))
+    source_label = str(run.get("evaluator_target") or evaluator_target or "—").strip()
+    source_url = str(run.get("evaluator_git_ref_url") or run.get("evaluator_source_url") or "").strip()
+    source_git_sha = str(run.get("evaluator_git_sha") or "").strip()
+    source_commit_url = str(run.get("evaluator_git_commit_url") or "").strip()
+    result_label = html.escape(
+        f"✅ {int(run.get('passed_count') or 0)}  ❌ {int(run.get('failed_count') or 0)}  ⏹ {int(run.get('canceled_count') or 0)}"
+    )
+    task_type = str(run.get("task_type") or "").strip()
+    task_status = str(run.get("task_status") or "").strip()
+    meta_bits = [bit for bit in [task_type, task_status] if bit]
+    flags = [
+        ("Summary", bool(run["has_summary"])),
+        ("Score", bool(run["has_score"])),
+        ("Parquet", bool(run["has_parquet"])),
+    ]
+    compare_available = any(enabled for _, enabled in flags)
+    title_class = "wf-run-title wf-run-text" + ("" if compare_available else " wf-run-title--muted")
+    cell_class = "wf-run-cell wf-run-text" + ("" if compare_available else " wf-run-cell--muted")
+    meta_class = "wf-meta-inline" + ("" if compare_available else " wf-meta-inline--muted")
+    flag_wrap_class = "wf-run-flags" + ("" if compare_available else " wf-run-flags--muted")
+    flag_html = "".join(
+        f'<span class="wf-flag {"wf-flag--ok" if enabled else ""}">{label}</span>'
+        for label, enabled in flags
+    )
+    if not compare_available:
+        flag_html += '<div class="wf-unavailable-note">Unavailable for compare</div>'
+    size_label = html.escape(str(run["size"]))
+    checkbox_key = f"workflow_compare_pick::{name_raw}"
+    if not compare_available:
+        st.session_state[checkbox_key] = False
+    elif checkbox_key not in st.session_state:
+        st.session_state[checkbox_key] = bool(selected)
+    row_cols = st.columns([0.45, 2.35, 0.72, 1.45, 1.55, 1.0, 1.0, 1.22, 0.68, 1.55], gap="small")
+    with row_cols[0]:
+        checked = st.checkbox(
+            "Select run",
+            key=checkbox_key,
+            label_visibility="collapsed",
+            disabled=not compare_available,
+        )
+    with row_cols[1]:
+        title_html = f'<div class="{title_class}"><a href="{_build_overview_url(name_raw)}" target="_self">{name}</a></div>'
+        if meta_bits:
+            meta_html = html.escape(" · ".join(meta_bits[:3]))
+            title_html += f'<div class="{meta_class}">{meta_html}</div>'
+        st.markdown(title_html, unsafe_allow_html=True)
+    with row_cols[2]:
+        st.markdown(f'<div class="{cell_class}">{user_label}</div>', unsafe_allow_html=True)
+    with row_cols[3]:
+        if catalog_url and catalog_label != "—":
+            st.markdown(
+                f'<div class="{title_class}"><a href="{catalog_url}" target="_blank">{catalog_label}</a></div>',
+                unsafe_allow_html=True,
+            )
+        else:
+            st.markdown(f'<div class="{cell_class}">{catalog_label}</div>', unsafe_allow_html=True)
+    with row_cols[4]:
+        if evaluator_report_url and evaluator_job_id:
+            evaluator_html = f'<div class="{title_class}"><a href="{html.escape(evaluator_report_url)}" target="_blank">{evaluator_title}</a></div>'
+        else:
+            evaluator_html = f'<div class="{cell_class}">{evaluator_title}</div>'
+        source_ref_html = _format_source_ref_html(source_label, source_url, source_git_sha, source_commit_url)
+        if source_ref_html and source_ref_html != "—":
+            evaluator_html += f'<div class="{meta_class}">{source_ref_html}</div>'
+        st.markdown(evaluator_html, unsafe_allow_html=True)
+    with row_cols[5]:
+        st.markdown(f'<div class="{cell_class}">{result_label}</div>', unsafe_allow_html=True)
+    with row_cols[6]:
+        st.markdown(f'<div class="{cell_class}">{modified}</div>', unsafe_allow_html=True)
+    with row_cols[7]:
+        st.markdown(f'<div class="wf-run-cell"><div class="{flag_wrap_class}">{flag_html}</div></div>', unsafe_allow_html=True)
+    with row_cols[8]:
+        st.markdown(f'<div class="{cell_class}">{size_label}</div>', unsafe_allow_html=True)
+    with row_cols[9]:
+        action_cols = st.columns([0.78, 0.82, 0.82], gap="small")
+        with action_cols[0]:
+            if st.button("ℹ", key=f"workflow_run_details::{name_raw}", use_container_width=True, help="Show run details"):
+                st.session_state["workflow_local_run_detail"] = name_raw
+        with action_cols[1]:
+            if st.button("⬇", key=f"workflow_run_download::{name_raw}", use_container_width=True, help="Prepare ZIP download"):
+                st.session_state["workflow_local_run_download"] = name_raw
+        with action_cols[2]:
+            if st.button("🗑", key=f"workflow_run_delete::{name_raw}", use_container_width=True, help="Delete this local run"):
+                st.session_state["workflow_local_run_delete"] = name_raw
+    return bool(checked)
+
+
+def _render_local_run_details(run: Dict[str, object]) -> None:
+    metadata = run.get("metadata") if isinstance(run.get("metadata"), dict) else {}
+    task_meta = metadata.get("task") if isinstance(metadata.get("task"), dict) else {}
+    request_meta = metadata.get("request") if isinstance(metadata.get("request"), dict) else {}
+    evaluator_meta = metadata.get("evaluator") if isinstance(metadata.get("evaluator"), dict) else {}
+    download_meta = metadata.get("download") if isinstance(metadata.get("download"), dict) else {}
+    scenario_download_meta = metadata.get("scenario_download") if isinstance(metadata.get("scenario_download"), dict) else {}
+    evaluation_meta = metadata.get("evaluation") if isinstance(metadata.get("evaluation"), dict) else {}
+    parquet_meta = metadata.get("parquet") if isinstance(metadata.get("parquet"), dict) else {}
+    project_id = str(request_meta.get("project_id") or "").strip()
+    request_environment = str(request_meta.get("environment") or "default").strip() or "default"
+    evaluator_job_id = str(evaluator_meta.get("job_id") or request_meta.get("job_id") or "").strip()
+    evaluator_report_url = str(evaluator_meta.get("report_url") or "").strip()
+    evaluator_target = str(evaluator_meta.get("target") or evaluator_meta.get("target_name") or request_meta.get("target_name") or "").strip()
+    evaluator_detail = {}
+    if project_id and evaluator_job_id:
+        try:
+            evaluator_detail = _fetch_evaluator_job_detail(project_id, request_environment, evaluator_job_id)
+        except Exception:
+            evaluator_detail = {}
+    source_url = str(
+        evaluator_meta.get("git_ref_url")
+        or evaluator_meta.get("source_url")
+        or evaluator_detail.get("source_url")
+        or evaluator_detail.get("git_ref_url")
+        or ""
+    ).strip()
+    source_commit_url = str(
+        evaluator_meta.get("git_commit_url")
+        or evaluator_detail.get("git_commit_url")
+        or ""
+    ).strip()
+    catalog_url = str(evaluator_detail.get("catalog_url") or "").strip()
+    source_label = str(evaluator_meta.get("target") or evaluator_detail.get("source_label") or evaluator_target or "").strip()
+    source_git_sha = str(evaluator_meta.get("git_sha") or evaluator_detail.get("git_sha") or "").strip()
+    source_ref_text = _format_source_ref_text(source_label or evaluator_target, source_git_sha)
+    source_ref_html = _format_source_ref_html(source_label or evaluator_target, source_url, source_git_sha, source_commit_url)
+
+    with st.container(border=True):
+        title_cols = st.columns([3.4, 1.0])
+        with title_cols[0]:
+            st.markdown(f"### Local Run Details: `{run['name']}`")
+        with title_cols[1]:
+            if st.button("Clear", key=f"workflow_clear_run_details::{run['name']}", use_container_width=True):
+                st.session_state["workflow_local_run_detail"] = ""
+                st.rerun()
+
+        if not metadata:
+            st.info("This run was created before metadata capture was added. Showing only filesystem information.")
+
+        top_cols = st.columns(4)
+        top_cols[0].metric("Updated", _metadata_text(run.get("modified")))
+        top_cols[1].metric("Size", _metadata_text(run.get("size")))
+        top_cols[2].metric("Task type", _metadata_text(task_meta.get("type") or metadata.get("source_mode")))
+        top_cols[3].metric("Task status", _metadata_text(task_meta.get("status")))
+
+        run_cols = st.columns(2)
+        with run_cols[0]:
+            st.caption("Run folder")
+            st.code(str(run.get("path_display") or run.get("name") or ""), language=None)
+        with run_cols[1]:
+            st.caption("Available outputs")
+            st.write(
+                " | ".join(
+                    label
+                    for label, enabled in [
+                        ("Summary.csv", bool(run.get("has_summary"))),
+                        ("Score.csv", bool(run.get("has_score"))),
+                        ("Parquet", bool(run.get("has_parquet"))),
+                    ]
+                    if enabled
+                )
+                or "—"
+            )
+
+        requested_by = str(task_meta.get("requested_by") or "").strip()
+        requested_by = str(
+            evaluator_meta.get("scheduled_by")
+            or requested_by
+            or ""
+        ).strip()
+        requested_by_label = requested_by or "—"
+        requested_by_label = _run_user_label(requested_by, request_environment)
+
+        task_cols = st.columns(4)
+        task_cols[0].text_input("Requested by", value=requested_by_label, disabled=True, key=f"run_detail_user::{run['name']}")
+        task_cols[1].text_input("Task ID", value=_metadata_text(task_meta.get("id")), disabled=True, key=f"run_detail_tid::{run['name']}")
+        task_cols[2].text_input("Created", value=_format_metadata_time(task_meta.get("created_at") or metadata.get("created_at")), disabled=True, key=f"run_detail_created::{run['name']}")
+        task_cols[3].text_input("Updated", value=_format_metadata_time(task_meta.get("updated_at") or metadata.get("updated_at")), disabled=True, key=f"run_detail_updated::{run['name']}")
+        task_error = str(task_meta.get("error_message") or "").strip()
+        if task_error:
+            st.error(task_error)
+
+        request_cols = st.columns(4)
+        request_cols[0].text_input("Project", value=_metadata_text(request_meta.get("project_id")), disabled=True, key=f"run_detail_project::{run['name']}")
+        request_cols[1].text_input("Environment", value=_metadata_text(request_environment), disabled=True, key=f"run_detail_env::{run['name']}")
+        request_cols[2].text_input("Catalog ID", value=_metadata_text(evaluator_meta.get("catalog_id") or request_meta.get("catalog_id")), disabled=True, key=f"run_detail_catalog::{run['name']}")
+        request_cols[3].text_input("Integration ID", value=_metadata_text(evaluator_meta.get("integration_id") or request_meta.get("integration_id")), disabled=True, key=f"run_detail_integration::{run['name']}")
+
+        detail_cols = st.columns(3)
+        detail_cols[0].text_input("Evaluator job ID", value=_metadata_text(evaluator_meta.get("job_id") or request_meta.get("job_id")), disabled=True, key=f"run_detail_job::{run['name']}")
+        detail_cols[1].text_input("Source job ID", value=_metadata_text(evaluator_meta.get("source_job_id") or request_meta.get("source_job_id")), disabled=True, key=f"run_detail_source_job::{run['name']}")
+        detail_cols[2].text_input("Target", value=_metadata_text(evaluator_meta.get("target") or request_meta.get("target_name")), disabled=True, key=f"run_detail_target::{run['name']}")
+
+        st.text_input("Description", value=_metadata_text(request_meta.get("description") or evaluator_meta.get("description")), disabled=True, key=f"run_detail_desc::{run['name']}")
+
+        if evaluator_job_id:
+            action_cols = st.columns([1.15, 1.15, 1.15, 2.55])
+            with action_cols[0]:
+                if evaluator_report_url:
+                    st.link_button("Open report", evaluator_report_url, use_container_width=True)
+            with action_cols[1]:
+                if source_url:
+                    st.link_button("Open source", source_url, use_container_width=True)
+            with action_cols[2]:
+                if catalog_url:
+                    st.link_button("Open catalog", catalog_url, use_container_width=True)
+            with action_cols[3]:
+                if st.button("Artifact retest", key=f"workflow_local_run_retest::{run['name']}", type="primary", use_container_width=True):
+                    st.session_state.pop(f"recent_eval_retest_suite_selection_{evaluator_job_id}", None)
+                    st.session_state["workflow_local_run_retest"] = str(run["name"])
+                    st.rerun()
+
+            info_cols = st.columns([1.6, 2.4])
+            info_cols[0].text_input(
+                "Evaluator job",
+                value=evaluator_job_id,
+                disabled=True,
+                key=f"run_detail_job_full::{run['name']}",
+            )
+            info_cols[1].text_input(
+                "Source ref",
+                value=_metadata_text(source_ref_text),
+                disabled=True,
+                key=f"run_detail_source_ref::{run['name']}",
+            )
+            if source_ref_html and source_ref_html != "—":
+                st.markdown(
+                    f'<div class="wf-linked-ref">GitHub: {source_ref_html}</div>',
+                    unsafe_allow_html=True,
+                )
+
+        if evaluator_meta:
+            eval_cols = st.columns(4)
+            eval_cols[0].text_input("Evaluator status", value=_metadata_text(evaluator_meta.get("status")), disabled=True, key=f"run_detail_estatus::{run['name']}")
+            eval_cols[1].text_input("Build status", value=_metadata_text(evaluator_meta.get("build_status")), disabled=True, key=f"run_detail_build::{run['name']}")
+            eval_cols[2].text_input("Test status", value=_metadata_text(evaluator_meta.get("test_status")), disabled=True, key=f"run_detail_test::{run['name']}")
+            eval_cols[3].text_input("Report URL", value=_metadata_text(evaluator_meta.get("report_url")), disabled=True, key=f"run_detail_report::{run['name']}")
+            fail_message = str(evaluator_meta.get("fail_message") or "").strip()
+            if fail_message:
+                st.warning(fail_message)
+            case_totals = evaluator_meta.get("case_totals") if isinstance(evaluator_meta.get("case_totals"), dict) else {}
+            if case_totals:
+                case_cols = st.columns(4)
+                case_cols[0].metric("Cases total", str(case_totals.get("total", 0)))
+                case_cols[1].metric("Cases success", str(case_totals.get("success", 0)))
+                case_cols[2].metric("Cases failed", str(case_totals.get("failed", 0)))
+                case_cols[3].metric("Cases canceled", str(case_totals.get("canceled", 0)))
+
+        if download_meta or scenario_download_meta:
+            active_download_meta = download_meta or scenario_download_meta
+            download_cols = st.columns(4)
+            download_cols[0].text_input("Download mode", value=_metadata_text(active_download_meta.get("mode") or metadata.get("source_mode")), disabled=True, key=f"run_detail_dl_mode::{run['name']}")
+            download_cols[1].text_input("Download type", value=_metadata_text(download_meta.get("download_type") or request_meta.get("download_type")), disabled=True, key=f"run_detail_dl_type::{run['name']}")
+            download_cols[2].text_input("Phase", value=_metadata_text(download_meta.get("phase") or request_meta.get("phase")), disabled=True, key=f"run_detail_phase::{run['name']}")
+            download_cols[3].text_input("Skip large files", value="Yes" if bool(download_meta.get("skip_large_file") or request_meta.get("skip_large_file")) else "No", disabled=True, key=f"run_detail_skip::{run['name']}")
+
+            count_cols = st.columns(3)
+            count_cols[0].metric("Download total", str(active_download_meta.get("total", 0)))
+            count_cols[1].metric("Download success", str(active_download_meta.get("success", 0)))
+            count_cols[2].metric("Download failed", str(active_download_meta.get("failed", 0)))
+
+        if evaluation_meta:
+            eval_run_cols = st.columns(4)
+            eval_run_cols[0].text_input("Eval enabled", value="Yes" if bool(evaluation_meta.get("enabled") or request_meta.get("run_eval")) else "No", disabled=True, key=f"run_detail_eval_enabled::{run['name']}")
+            eval_run_cols[1].text_input("Recursive", value="Yes" if bool(evaluation_meta.get("recursive") or request_meta.get("eval_recursive")) else "No", disabled=True, key=f"run_detail_eval_recursive::{run['name']}")
+            eval_run_cols[2].text_input("Summary rows", value=str(evaluation_meta.get("summary_rows", "—")), disabled=True, key=f"run_detail_summary_rows::{run['name']}")
+            eval_run_cols[3].text_input("Score rows", value=str(evaluation_meta.get("score_rows", "—")), disabled=True, key=f"run_detail_score_rows::{run['name']}")
+
+        if parquet_meta:
+            st.text_input("Parquet path", value=_metadata_text(parquet_meta.get("path")), disabled=True, key=f"run_detail_parquet::{run['name']}")
+
+        suites = evaluator_meta.get("suites") if isinstance(evaluator_meta.get("suites"), list) else []
+        failed_cases = evaluator_meta.get("failed_cases") if isinstance(evaluator_meta.get("failed_cases"), list) else []
+        if suites:
+            with st.expander("Evaluator suites", expanded=False):
+                st.dataframe(suites, width="stretch", hide_index=True)
+        if failed_cases:
+            with st.expander("Failed cases", expanded=False):
+                st.dataframe(failed_cases, width="stretch", hide_index=True)
+
+        with st.expander("Raw run metadata", expanded=False):
+            st.json(metadata or {})
+
+        selected_retest_run = str(st.session_state.get("workflow_local_run_retest") or "").strip()
+        if selected_retest_run == str(run["name"]) and evaluator_job_id:
+            dialog_job = {
+                "job_id": evaluator_job_id,
+                "title": str(evaluator_detail.get("title") or run.get("description") or run["name"]),
+            }
+            if callable(getattr(st, "dialog", None)):
+                try:
+                    @st.dialog(f"Artifact retest · {dialog_job['title']}", width="large")
+                    def _workflow_local_run_retest_dialog() -> None:
+                        _render_recent_evaluator_job_retest_dialog(
+                            project_id,
+                            request_environment,
+                            dialog_job,
+                            output_path_default="",
+                            phase_default=str(request_meta.get("phase") or "perception.object_recognition.tracking.objects"),
+                        )
+
+                    _workflow_local_run_retest_dialog()
+                finally:
+                    if st.session_state.get("workflow_local_run_retest") == str(run["name"]):
+                        st.session_state.pop("workflow_local_run_retest", None)
+            else:
+                st.markdown("---")
+                fallback_cols = st.columns([4.2, 1.0])
+                with fallback_cols[0]:
+                    st.subheader(f"Artifact retest · {dialog_job['title']}")
+                with fallback_cols[1]:
+                    if st.button("Close", key=f"workflow_local_run_retest_close::{run['name']}", use_container_width=True):
+                        st.session_state.pop("workflow_local_run_retest", None)
+                        st.rerun()
+                _render_recent_evaluator_job_retest_dialog(
+                    project_id,
+                    request_environment,
+                    dialog_job,
+                    output_path_default="",
+                    phase_default=str(request_meta.get("phase") or "perception.object_recognition.tracking.objects"),
+                )
+
+
+def _render_local_runs_section() -> None:
+    section_header("Local Runs", "")
+    runs = _load_local_runs()
+    if not runs:
+        st.markdown('<div class="wf-empty">No finished runs were found on this server yet.</div>', unsafe_allow_html=True)
+        return
+    missing_source_runs = sum(1 for run in runs if _run_needs_source_backfill(run))
+    local_runs_toolbar_cols = st.columns([4.2, 1.2])
+    with local_runs_toolbar_cols[0]:
+        if missing_source_runs:
+            st.caption(f"{missing_source_runs} run(s) are missing stored GitHub metadata.")
+    with local_runs_toolbar_cols[1]:
+        if missing_source_runs and st.button(
+            "Backfill GitHub",
+            key="workflow_backfill_local_run_source_meta",
+            use_container_width=True,
+        ):
+            with st.spinner("Backfilling missing GitHub metadata for local runs..."):
+                result = _backfill_local_run_source_metadata(runs)
+            _load_local_runs.clear()
+            if result["failed"]:
+                st.warning(
+                    f"Backfill updated {result['updated']} run(s), skipped {result['skipped']} run(s), failed on {result['failed']} run(s)."
+                )
+            else:
+                st.success(
+                    f"Backfill updated {result['updated']} run(s); {result['skipped']} run(s) already had metadata."
+                )
+            st.rerun()
+
+    current_user_id = str(get_task_list_current_user() or "").strip()
+    user_options = ["All users"]
+    if current_user_id:
+        user_options.append("My runs")
+    unique_users = []
+    seen_users = set()
+    user_option_subject_map = {"All users": "", "My runs": current_user_id, "(Auto)": "__auto__"}
+    for row in runs:
+        subject_id = str(row.get("requested_by") or "").strip()
+        label = str(row.get("requested_by_label") or "").strip()
+        option = label or "(Auto)"
+        if not subject_id:
+            if "(Auto)" not in user_options:
+                user_options.append("(Auto)")
+            continue
+        deduped_option = option
+        suffix = 2
+        while deduped_option in seen_users and user_option_subject_map.get(deduped_option) != subject_id:
+            deduped_option = f"{option} [{suffix}]"
+            suffix += 1
+        if deduped_option not in seen_users:
+            unique_users.append(deduped_option)
+            seen_users.add(deduped_option)
+            user_option_subject_map[deduped_option] = subject_id
+    user_options.extend(unique_users)
+
+    catalog_options = ["All catalogs"]
+    catalog_option_id_map = {"All catalogs": ""}
+    unique_catalogs = []
+    seen_catalogs = set()
+    for row in runs:
+        catalog_id = str(row.get("catalog_id") or "").strip()
+        catalog_label = str(row.get("catalog_label") or row.get("catalog_name") or catalog_id or "—").strip()
+        if not catalog_id:
+            continue
+        option = catalog_label or catalog_id
+        deduped_option = option
+        suffix = 2
+        while deduped_option in seen_catalogs and catalog_option_id_map.get(deduped_option) != catalog_id:
+            deduped_option = f"{option} [{suffix}]"
+            suffix += 1
+        if deduped_option not in seen_catalogs:
+            unique_catalogs.append(deduped_option)
+            seen_catalogs.add(deduped_option)
+            catalog_option_id_map[deduped_option] = catalog_id
+    catalog_options.extend(sorted(unique_catalogs, key=str.lower))
+
+    current_user_option = str(st.session_state.get("workflow_runs_user_filter", "All users"))
+    if current_user_option not in user_options:
+        current_user_option = "All users"
+        st.session_state["workflow_runs_user_filter"] = current_user_option
+    current_catalog_option = str(st.session_state.get("workflow_runs_catalog_filter", "All catalogs"))
+    if current_catalog_option not in catalog_options:
+        current_catalog_option = "All catalogs"
+        st.session_state["workflow_runs_catalog_filter"] = current_catalog_option
+    branch_options = ["All branches"]
+    unique_branches = sorted(
+        {
+            str(row.get("branch_label") or row.get("evaluator_target") or "").strip()
+            for row in runs
+            if str(row.get("branch_label") or row.get("evaluator_target") or "").strip()
+        },
+        key=str.lower,
+    )
+    branch_options.extend(unique_branches)
+    current_branch_option = str(st.session_state.get("workflow_runs_branch_filter", "All branches"))
+    if current_branch_option not in branch_options:
+        current_branch_option = "All branches"
+        st.session_state["workflow_runs_branch_filter"] = current_branch_option
+
+    st.markdown('<div class="wf-filter-strip">', unsafe_allow_html=True)
+    control_cols = st.columns([1.7, 1.15, 1.1, 0.95, 0.95])
+    with control_cols[0]:
+        st.markdown('<div class="wf-toolbar-note">Search</div>', unsafe_allow_html=True)
+        run_search_input = st.text_input(
+            "Search runs",
+            value=st.session_state.get("workflow_runs_search", ""),
+            key="workflow_runs_search",
+            label_visibility="collapsed",
+            placeholder="Filter by name, description, job id, catalog, user",
+        )
+    with control_cols[1]:
+        st.markdown('<div class="wf-toolbar-note">Catalog</div>', unsafe_allow_html=True)
+        catalog_filter_input = st.selectbox(
+            "Catalog",
+            options=catalog_options,
+            index=catalog_options.index(current_catalog_option),
+            key="workflow_runs_catalog_filter",
+            label_visibility="collapsed",
+        )
+    with control_cols[2]:
+        st.markdown('<div class="wf-toolbar-note">Branch</div>', unsafe_allow_html=True)
+        branch_filter_input = st.selectbox(
+            "Branch",
+            options=branch_options,
+            index=branch_options.index(current_branch_option),
+            key="workflow_runs_branch_filter",
+            label_visibility="collapsed",
+        )
+    with control_cols[3]:
+        st.markdown('<div class="wf-toolbar-note">User</div>', unsafe_allow_html=True)
+        user_filter_input = st.selectbox(
+            "User",
+            options=user_options,
+            index=user_options.index(current_user_option),
+            key="workflow_runs_user_filter",
+            label_visibility="collapsed",
+        )
+    with control_cols[4]:
+        st.markdown('<div class="wf-toolbar-note">Rows</div>', unsafe_allow_html=True)
+        page_size_input = int(
+            st.selectbox(
+                "Rows",
+                options=[10, 20, 50, 100],
+                index=[10, 20, 50, 100].index(int(st.session_state.get("workflow_runs_page_size", 10) or 10)),
+                key="workflow_runs_page_size",
+                label_visibility="collapsed",
+            )
+        )
+
+    second_control_cols = st.columns([0.92, 0.92, 0.6, 0.6, 2.4])
+    with second_control_cols[0]:
+        st.markdown('<div class="wf-toolbar-note">From</div>', unsafe_allow_html=True)
+        date_from_input = st.date_input(
+            "From",
+            value=st.session_state.get("workflow_runs_date_from", None),
+            key="workflow_runs_date_from",
+            label_visibility="collapsed",
+            help="Run modified-date lower bound in JST.",
+        )
+    with second_control_cols[1]:
+        st.markdown('<div class="wf-toolbar-note">To</div>', unsafe_allow_html=True)
+        date_to_input = st.date_input(
+            "To",
+            value=st.session_state.get("workflow_runs_date_to", None),
+            key="workflow_runs_date_to",
+            label_visibility="collapsed",
+            help="Run modified-date upper bound in JST.",
+        )
+    with second_control_cols[2]:
+        st.markdown('<div class="wf-toolbar-note">Summary</div>', unsafe_allow_html=True)
+        require_summary_input = st.toggle(
+            "Summary only",
+            value=bool(st.session_state.get("workflow_runs_summary_filter", False)),
+            key="workflow_runs_summary_filter",
+            label_visibility="collapsed",
+        )
+    with second_control_cols[3]:
+        st.markdown('<div class="wf-toolbar-note">Parquet</div>', unsafe_allow_html=True)
+        require_parquet_input = st.toggle(
+            "Parquet only",
+            value=bool(st.session_state.get("workflow_runs_parquet_filter", False)),
+            key="workflow_runs_parquet_filter",
+            label_visibility="collapsed",
+        )
+    with second_control_cols[4]:
+        st.markdown(
+            '<div class="wf-pager-summary">Pick a catalog, branch, or user directly, or narrow with text and dates.</div>',
+            unsafe_allow_html=True,
+        )
+    st.markdown('</div>', unsafe_allow_html=True)
+
+    current_filter_signature = (
+        str(run_search_input or ""),
+        str(catalog_filter_input or "All catalogs"),
+        str(branch_filter_input or "All branches"),
+        str(user_filter_input or "All users"),
+        date_from_input,
+        date_to_input,
+        bool(require_summary_input),
+        bool(require_parquet_input),
+        int(page_size_input),
+    )
+    previous_filter_signature = st.session_state.get("workflow_runs_filter_signature")
+    if previous_filter_signature is None:
+        st.session_state["workflow_runs_filter_signature"] = current_filter_signature
+    elif previous_filter_signature != current_filter_signature:
+        st.session_state["workflow_runs_filter_signature"] = current_filter_signature
+        st.session_state["workflow_runs_page"] = 1
+
+    run_search = str(run_search_input).strip().lower()
+    selected_catalog_filter = str(catalog_filter_input).strip()
+    selected_branch_filter = str(branch_filter_input).strip()
+    selected_user_filter = str(user_filter_input).strip()
+    selected_date_from = date_from_input
+    selected_date_to = date_to_input
+    require_summary = bool(require_summary_input)
+    require_parquet = bool(require_parquet_input)
+    page_size = int(page_size_input)
+
+    if selected_date_from and selected_date_to and selected_date_from > selected_date_to:
+        st.warning("`From` date must be earlier than or equal to `To` date.")
+        return
+
+    filtered = runs
+    if run_search:
+        filtered = [row for row in filtered if run_search in str(row.get("search_blob") or row["name"]).lower()]
+    if selected_catalog_filter not in ("", "All catalogs"):
+        selected_catalog_id = str(catalog_option_id_map.get(selected_catalog_filter) or "").strip()
+        filtered = [row for row in filtered if str(row.get("catalog_id") or "").strip() == selected_catalog_id]
+    if selected_branch_filter not in ("", "All branches"):
+        filtered = [
+            row for row in filtered
+            if str(row.get("branch_label") or row.get("evaluator_target") or "").strip() == selected_branch_filter
+        ]
+    if selected_user_filter == "My runs" and current_user_id:
+        filtered = [row for row in filtered if str(row.get("requested_by") or "").strip() == current_user_id]
+    elif selected_user_filter == "(Auto)":
+        filtered = [row for row in filtered if not str(row.get("requested_by") or "").strip()]
+    elif selected_user_filter not in ("", "All users", "My runs"):
+        selected_subject_id = str(user_option_subject_map.get(selected_user_filter) or "").strip()
+        filtered = [row for row in filtered if str(row.get("requested_by") or "").strip() == selected_subject_id]
+    if selected_date_from:
+        filtered = [row for row in filtered if row.get("mtime_date") and row["mtime_date"] >= selected_date_from]
+    if selected_date_to:
+        filtered = [row for row in filtered if row.get("mtime_date") and row["mtime_date"] <= selected_date_to]
+    if require_summary:
+        filtered = [row for row in filtered if bool(row["has_summary"])]
+    if require_parquet:
+        filtered = [row for row in filtered if bool(row["has_parquet"])]
+
+    compare_ready = [
+        str(row["name"])
+        for row in filtered
+        if bool(row["has_summary"]) or bool(row["has_score"]) or bool(row["has_parquet"])
+    ]
+    if "workflow_compare_runs" not in st.session_state:
+        st.session_state["workflow_compare_runs"] = compare_ready[:1]
+
+    compare_selected = [
+        name for name in st.session_state.get("workflow_compare_runs", [])
+        if name in compare_ready
+    ]
+    st.session_state["workflow_compare_runs"] = compare_selected
+
+    if not filtered:
+        st.markdown('<div class="wf-empty">No local runs matched the current filters.</div>', unsafe_allow_html=True)
+        return
+
+    page_key = "workflow_runs_page"
+    current_page = max(1, int(st.session_state.get(page_key, 1)))
+    page_count = max(1, (len(filtered) + page_size - 1) // page_size)
+    if current_page > page_count:
+        current_page = page_count
+        st.session_state[page_key] = current_page
+    start_idx = (current_page - 1) * page_size
+    visible_runs = filtered[start_idx:start_idx + page_size]
+    visible_names = {str(run["name"]) for run in visible_runs}
+
+    visible_end = min(len(filtered), start_idx + len(visible_runs))
+    st.markdown('<div class="wf-pager-strip">', unsafe_allow_html=True)
+    pager_cols = st.columns([0.65, 1.0, 0.65, 3.2])
+    with pager_cols[0]:
+        if st.button("‹", key="workflow_runs_page_prev", use_container_width=True, disabled=current_page <= 1):
+            current_page -= 1
+            st.session_state[page_key] = current_page
+            st.rerun()
+    with pager_cols[1]:
+        selected_page = st.selectbox(
+            "Page",
+            options=list(range(1, page_count + 1)),
+            index=max(0, current_page - 1),
+            label_visibility="collapsed",
+        )
+        if selected_page != current_page:
+            st.session_state[page_key] = int(selected_page)
+            current_page = int(selected_page)
+            start_idx = (current_page - 1) * page_size
+            visible_runs = filtered[start_idx:start_idx + page_size]
+            visible_names = {str(run["name"]) for run in visible_runs}
+    with pager_cols[2]:
+        if st.button("›", key="workflow_runs_page_next", use_container_width=True, disabled=current_page >= page_count):
+            current_page += 1
+            st.session_state[page_key] = current_page
+            st.rerun()
+    with pager_cols[3]:
+        st.markdown(
+            f'<div class="wf-pager-summary"><strong>{start_idx + 1}</strong>–<strong>{visible_end}</strong> of <strong>{len(filtered)}</strong> runs · {page_size} per page</div>',
+            unsafe_allow_html=True,
+        )
+    st.markdown('</div>', unsafe_allow_html=True)
+
+    _render_local_runs_header()
+    next_selected = [name for name in st.session_state.get("workflow_compare_runs", []) if name not in visible_names]
+    for run in visible_runs:
+        run_name = str(run["name"])
+        if _render_local_run_row(run, selected=run_name in st.session_state.get("workflow_compare_runs", [])) and run_name in compare_ready:
+            next_selected.append(run_name)
+    st.session_state["workflow_compare_runs"] = [name for name in compare_ready if name in next_selected]
+
+    st.markdown('<div class="wf-compare-bar">', unsafe_allow_html=True)
+    st.markdown('<p class="wf-compare-title">Compare</p>', unsafe_allow_html=True)
+    compare_cols = st.columns([3.4, 1.0])
+    with compare_cols[0]:
+        st.markdown('<div class="wf-toolbar-note">Selected runs</div>', unsafe_allow_html=True)
+        selected_runs = list(st.session_state.get("workflow_compare_runs", []))
+        if selected_runs:
+            st.caption(" | ".join(selected_runs))
+    with compare_cols[1]:
+        st.markdown('<div class="wf-toolbar-note">Action</div>', unsafe_allow_html=True)
+        if len(selected_runs) >= 2:
+            st.link_button("Compare", _build_overview_url(selected_runs[0], selected_runs[1:]), use_container_width=True)
+        elif len(selected_runs) == 1:
+            st.link_button("Open", _build_overview_url(selected_runs[0]), use_container_width=True)
+        else:
+            st.button("Open", disabled=True, use_container_width=True, key="workflow_compare_run_disabled")
+    st.markdown("</div>", unsafe_allow_html=True)
+
+    download_run_name = str(st.session_state.get("workflow_local_run_download") or "").strip()
+    if download_run_name:
+        if callable(getattr(st, "dialog", None)):
+            @st.dialog(f"Download artifacts · {download_run_name}", width="large")
+            def _workflow_local_run_download_dialog() -> None:
+                _render_local_run_download_dialog(download_run_name)
+                if st.button("Close", key=f"workflow_local_run_download_close::{download_run_name}", use_container_width=True):
+                    st.session_state.pop("workflow_local_run_download", None)
+                    st.rerun()
+
+            _workflow_local_run_download_dialog()
+        else:
+            st.markdown("---")
+            st.subheader(f"Download artifacts · {download_run_name}")
+            _render_local_run_download_dialog(download_run_name)
+
+    delete_run_name = str(st.session_state.get("workflow_local_run_delete") or "").strip()
+    if delete_run_name:
+        if callable(getattr(st, "dialog", None)):
+            @st.dialog(f"Delete local run · {delete_run_name}", width="large")
+            def _workflow_local_run_delete_dialog() -> None:
+                _render_local_run_delete_dialog(delete_run_name)
+                if st.button("Cancel", key=f"workflow_local_run_delete_close::{delete_run_name}", use_container_width=True):
+                    st.session_state.pop("workflow_local_run_delete", None)
+                    st.rerun()
+
+            _workflow_local_run_delete_dialog()
+        else:
+            st.markdown("---")
+            st.subheader(f"Delete local run · {delete_run_name}")
+            _render_local_run_delete_dialog(delete_run_name)
+
+    detail_run_name = str(st.session_state.get("workflow_local_run_detail") or "").strip()
+    if detail_run_name:
+        detail_run = next((row for row in runs if str(row["name"]) == detail_run_name), None)
+        if detail_run is not None:
+            _render_local_run_details(detail_run)
+
+
+def _render_current_tasks_section() -> None:
+    section_header("Current Tasks", "")
+    if not is_task_queue_enabled():
+        st.info("Task queue not enabled. Set `USE_TASK_QUEUE=true` to track background tasks.")
+        return
+
+    current_user = get_task_list_current_user()
+    if "workflow_task_history_range" not in st.session_state:
+        st.session_state["workflow_task_history_range"] = "7 days"
+    if "workflow_task_history_page_size" not in st.session_state:
+        st.session_state["workflow_task_history_page_size"] = 20
+    if "workflow_task_history_page" not in st.session_state:
+        st.session_state["workflow_task_history_page"] = 1
+
+    control_cols = st.columns([1.3, 1.0, 1.0, 2.7])
+    with control_cols[0]:
+        selected_range = st.selectbox(
+            "History range",
+            options=list(_TASK_HISTORY_RANGE_OPTIONS.keys()),
+            key="workflow_task_history_range",
+        )
+    with control_cols[1]:
+        page_size = int(
+            st.selectbox(
+                "Rows",
+                options=[20, 50, 100],
+                key="workflow_task_history_page_size",
+            )
+        )
+    since_days = _TASK_HISTORY_RANGE_OPTIONS.get(selected_range, _TASK_LIST_SINCE_DAYS)
+    total_tasks = count_recent_tasks(session_id=current_user, since_days=since_days)
+    page_count = max(1, (total_tasks + page_size - 1) // page_size) if total_tasks else 1
+    current_page = min(max(1, int(st.session_state.get("workflow_task_history_page", 1))), page_count)
+    st.session_state["workflow_task_history_page"] = current_page
+    with control_cols[2]:
+        selected_page = st.selectbox(
+            "Page",
+            options=list(range(1, page_count + 1)),
+            index=current_page - 1,
+            key="workflow_task_history_page_select",
+        )
+        if int(selected_page) != current_page:
+            current_page = int(selected_page)
+            st.session_state["workflow_task_history_page"] = current_page
+    with control_cols[3]:
+        label = selected_range if since_days is not None else "all time"
+        st.caption(f"Showing **{total_tasks}** tasks across **{page_count}** page(s) for **{label}**.")
+
+    offset = (current_page - 1) * page_size
+    use_fragment = getattr(st, "fragment", None) is not None
+    if use_fragment:
+        try:
+
+            @st.fragment(run_every=timedelta(seconds=3))
+            def _task_list_poll():
+                current_tasks = list_recent_tasks(
+                    limit=page_size,
+                    offset=offset,
+                    session_id=current_user,
+                    since_days=since_days,
+                )
+                render_task_list(current_tasks, current_user)
+
+            _task_list_poll()
+            return
+        except (TypeError, AttributeError):
+            use_fragment = False
+
+    tasks = list_recent_tasks(
+        limit=page_size,
+        offset=offset,
+        session_id=current_user,
+        since_days=since_days,
+    )
+    has_active = render_task_list(tasks, current_user)
+    if st.button("Refresh tasks", key="workflow_refresh_tasks"):
+        st.rerun()
+    if has_active:
+        st.caption("Active jobs are shown live when possible. Use refresh if this browser does not support fragments.")
+
+
+def _get_start_workflow_defaults() -> Dict[str, object]:
+    default_target = get_config_value("target_name", "beta/v4.3.2")
+    return {
+        "project_id": get_config_value("eval_project_id", "x2_dev"),
+        "environment": get_config_value("environment", ""),
+        "output_path_default": _make_default_output_path(default_target),
+        "download_type_default": get_config_value("eval_download_type", "Archives (ZIP)"),
+        "phase_default": get_config_value(
+            "eval_phase",
+            "perception.object_recognition.tracking.objects",
+        ),
+        "skip_large_file_default": True,
+        "large_file_mb_default": 50.0,
+        "keep_zip_files_default": False,
+    }
+
+
+def _render_start_workflow_form(
+    catalog_presets: List[Dict[str, str]],
+    catalogs_path: Optional[str],
+    catalog_load_error: Optional[str],
+) -> Dict[str, object]:
+    if catalog_load_error:
+        st.warning(f"Could not read catalog presets: {catalog_load_error}")
+    elif catalogs_path:
+        st.caption(f"Catalog presets loaded from `{catalogs_path}`.")
+
+    catalog_names = [item["display_name"] for item in catalog_presets]
+    default_project = get_config_value("eval_project_id", "x2_dev")
+    default_target = get_config_value("target_name", "beta/v4.3.2")
+    default_download_type = get_config_value("eval_download_type", "Archives (ZIP)")
+    default_phase = get_config_value(
+        "eval_phase",
+        "perception.object_recognition.tracking.objects",
+    )
+    default_poll_interval = int(get_config_value("poll_interval", 60))
+    default_max_wait_hours = int(get_config_value("max_wait_hours", 24))
+    default_environment = get_config_value("environment", "")
+    default_output = _make_default_output_path(default_target)
+    default_skip_large_file = True
+
+    if "workflow_server_catalogs" not in st.session_state:
+        st.session_state["workflow_server_catalogs"] = []
+    if "workflow_server_catalog_error" not in st.session_state:
+        st.session_state["workflow_server_catalog_error"] = ""
+    if "workflow_selected_server_catalog_id" not in st.session_state:
+        st.session_state["workflow_selected_server_catalog_id"] = ""
+    if "workflow_catalog_id" not in st.session_state:
+        st.session_state["workflow_catalog_id"] = ""
+    if "workflow_integration_id" not in st.session_state:
+        st.session_state["workflow_integration_id"] = ""
+    if "workflow_catalog_resolution_error" not in st.session_state:
+        st.session_state["workflow_catalog_resolution_error"] = ""
+    if "workflow_last_catalog_selection" not in st.session_state:
+        st.session_state["workflow_last_catalog_selection"] = ""
+
+    server_catalogs = st.session_state.get("workflow_server_catalogs", []) or []
+    server_catalog_labels = [
+        f"{item['display_name']} ({item['catalog_id']})" for item in server_catalogs
+    ]
+    catalog_options = [""] + catalog_names + [
+        label for label in server_catalog_labels if label not in catalog_names
+    ]
+    preset_by_label = {item["display_name"]: item for item in catalog_presets}
+    server_by_label = {
+        f"{item['display_name']} ({item['catalog_id']})": item for item in server_catalogs
+    }
+
+    release_mode = st.checkbox(
+        "Release data workflow: schedule Performance Test + Devops Test",
+        value=bool(st.session_state.get("workflow_release_mode", False)),
+        key="workflow_release_mode",
+        help="Queues the two standard release evaluator jobs, processes both as normal app runs, then generates a release specsheet with trend data.",
+    )
+    if release_mode:
+        st.info(
+            "Release mode uses the app-native flow: schedule Performance Test and Devops Test, create normal CSV/parquet analysis folders, write release metadata, and generate the trend-enabled specsheet PDF."
+        )
+
+    top_cols = st.columns([1.0, 1.9, 1.2])
+    with top_cols[0]:
+        st.markdown('<div class="wf-toolbar-note">Project</div>', unsafe_allow_html=True)
+        project_id = st.text_input(
+            "Project ID",
+            value=default_project,
+            key="workflow_project_id",
+            label_visibility="collapsed",
+        ).strip()
+    with top_cols[1]:
+        st.markdown('<div class="wf-toolbar-note">Catalog</div>', unsafe_allow_html=True)
+        catalog_picker_cols = st.columns([4.2, 1.1], gap="small")
+        with catalog_picker_cols[0]:
+            selected_catalog_name = st.selectbox(
+                "Catalog",
+                options=catalog_options if catalog_options else [""],
+                index=catalog_options.index(st.session_state.get("workflow_catalog_name", "")) if st.session_state.get("workflow_catalog_name", "") in catalog_options else 0,
+                key="workflow_catalog_name",
+                label_visibility="collapsed",
+                format_func=lambda value: value or "Choose a catalog",
+                disabled=release_mode,
+            )
+        with catalog_picker_cols[1]:
+            fetch_catalogs_clicked = st.button(
+                "Fetch",
+                key="workflow_fetch_server_catalogs",
+                use_container_width=True,
+                disabled=release_mode,
+            )
+            if fetch_catalogs_clicked:
+                try:
+                    current_environment = str(st.session_state.get("workflow_environment", default_environment) or "")
+                    st.session_state["workflow_server_catalogs"] = _fetch_server_catalogs(project_id, current_environment)
+                    st.session_state["workflow_server_catalog_error"] = ""
+                except Exception as exc:
+                    st.session_state["workflow_server_catalogs"] = []
+                    st.session_state["workflow_server_catalog_error"] = str(exc)
+    selected_catalog = preset_by_label.get(selected_catalog_name)
+    selected_server_catalog = server_by_label.get(selected_catalog_name)
+    if "workflow_last_catalog_preset" not in st.session_state:
+        st.session_state["workflow_last_catalog_preset"] = ""
+    if st.session_state["workflow_last_catalog_preset"] != selected_catalog_name and selected_catalog:
+        st.session_state["workflow_catalog_id"] = str(selected_catalog.get("catalog_id") or "")
+        st.session_state["workflow_integration_id"] = str(selected_catalog.get("integration_id") or "")
+        st.session_state["workflow_selected_server_catalog_id"] = ""
+        st.session_state["workflow_catalog_resolution_error"] = ""
+        st.session_state["workflow_last_catalog_preset"] = selected_catalog_name
+    elif selected_server_catalog:
+        st.session_state["workflow_catalog_id"] = str(selected_server_catalog.get("catalog_id") or "")
+        st.session_state["workflow_selected_server_catalog_id"] = str(selected_server_catalog.get("catalog_id") or "")
+        current_environment = str(st.session_state.get("workflow_environment", default_environment) or "")
+        if st.session_state["workflow_last_catalog_selection"] != selected_catalog_name:
+            try:
+                st.session_state["workflow_integration_id"] = _resolve_integration_id_for_catalog(
+                    project_id,
+                    current_environment,
+                    st.session_state["workflow_catalog_id"],
+                )
+                st.session_state["workflow_catalog_resolution_error"] = ""
+            except Exception as exc:
+                st.session_state["workflow_integration_id"] = ""
+                st.session_state["workflow_catalog_resolution_error"] = str(exc)
+            st.session_state["workflow_last_catalog_selection"] = selected_catalog_name
+    elif st.session_state["workflow_last_catalog_selection"] != selected_catalog_name:
+        st.session_state["workflow_catalog_resolution_error"] = ""
+        st.session_state["workflow_last_catalog_selection"] = selected_catalog_name
+    with top_cols[2]:
+        st.markdown('<div class="wf-toolbar-note">Branch or tag</div>', unsafe_allow_html=True)
+        target_name = st.text_input(
+            "Branch or Tag",
+            value=default_target,
+            key="workflow_target_name",
+            label_visibility="collapsed",
+            placeholder="beta/v4.3.2",
+        ).strip()
+
+    catalog_id = str(st.session_state.get("workflow_catalog_id") or "").strip()
+    integration_id = str(st.session_state.get("workflow_integration_id") or "").strip()
+
+    if st.session_state.get("workflow_server_catalog_error"):
+        st.warning(f"Could not fetch catalogs: {st.session_state['workflow_server_catalog_error']}")
+    catalog_id = str(st.session_state.get("workflow_catalog_id") or "").strip()
+
+    picker_cols = st.columns([1.2, 1.2, 1.75])
+    with picker_cols[0]:
+        st.markdown(
+            f'<div class="wf-toolbar-note">{"Release output folder" if release_mode else "Output folder"}</div>',
+            unsafe_allow_html=True,
+        )
+        output_path = st.text_input(
+            "Release output folder" if release_mode else "Output folder",
+            value=default_output,
+            key="workflow_output_path",
+            label_visibility="collapsed",
+            placeholder=_make_default_output_path(target_name),
+            help=(
+                "Folder under data/. Release mode creates metadata.yaml, performance/, devops/, and specsheet/ in this single folder."
+                if release_mode
+                else "Output folder under the data directory."
+            ),
+        ).strip()
+    with picker_cols[1]:
+        st.markdown('<div class="wf-toolbar-note">Phase</div>', unsafe_allow_html=True)
+        phase_value = "perception.object_recognition.tracking.objects" if release_mode else default_phase
+        phase = st.text_input(
+            "Phase",
+            value=phase_value,
+            key="workflow_phase",
+            label_visibility="collapsed",
+            disabled=release_mode,
+            help=(
+                "Release mode uses this standard phase automatically for both detailed-analysis downloads."
+                if release_mode
+                else None
+            ),
+        )
+    with picker_cols[2]:
+        st.markdown('<div class="wf-toolbar-note">Description</div>', unsafe_allow_html=True)
+        description = st.text_input(
+            "Description",
+            value=get_config_value("workflow_description", ""),
+            key="workflow_description",
+            label_visibility="collapsed",
+            placeholder="Optional label for the evaluator run",
+        ).strip()
+
+    trend_metadata: Dict[str, object] = {}
+    if release_mode:
+        metadata_default_key = "workflow_release_metadata_default_target"
+        metadata_text_key = "workflow_release_metadata_text"
+        if (
+            st.session_state.get(metadata_default_key) != target_name
+            or metadata_text_key not in st.session_state
+        ):
+            st.session_state[metadata_text_key] = _make_default_release_metadata_text(target_name)
+            st.session_state[metadata_default_key] = target_name
+
+        current_metadata_text = str(st.session_state.get(metadata_text_key) or "")
+        trend_topic_from_metadata = _extract_release_metadata_topic(current_metadata_text)
+        option_values = list(_RELEASE_TREND_TOPIC_OPTIONS.values())
+        topic_labels = list(_RELEASE_TREND_TOPIC_OPTIONS.keys())
+        if trend_topic_from_metadata in option_values:
+            topic_index = option_values.index(trend_topic_from_metadata)
+        else:
+            topic_index = topic_labels.index("Custom")
+            st.session_state.setdefault("workflow_release_custom_trend_topic", trend_topic_from_metadata)
+
+        topic_label_key = "workflow_release_trend_topic_label"
+        topic_yaml_key = "workflow_release_trend_topic_yaml_value"
+        if st.session_state.get(topic_yaml_key) != trend_topic_from_metadata:
+            st.session_state[topic_label_key] = topic_labels[topic_index]
+            st.session_state[topic_yaml_key] = trend_topic_from_metadata
+            if topic_labels[topic_index] == "Custom":
+                st.session_state["workflow_release_custom_trend_topic"] = trend_topic_from_metadata
+
+        trend_topic_label = st.selectbox(
+            "Trend topic",
+            options=topic_labels,
+            key=topic_label_key,
+            help="Used only for trend graphs. The specsheet data topic is detected from parquet/csv separately.",
+        )
+        if trend_topic_label == "Custom":
+            trend_topic = st.text_input(
+                "Custom trend topic",
+                value=st.session_state.get("workflow_release_custom_trend_topic", trend_topic_from_metadata),
+                key="workflow_release_custom_trend_topic",
+                placeholder="perception.object_recognition.objects",
+            ).strip()
+        else:
+            trend_topic = _RELEASE_TREND_TOPIC_OPTIONS[trend_topic_label]
+        if trend_topic and trend_topic != trend_topic_from_metadata:
+            st.session_state[metadata_text_key] = _replace_release_metadata_topic(
+                current_metadata_text,
+                trend_topic,
+            )
+            st.session_state[topic_yaml_key] = trend_topic
+
+        metadata_text = st.text_area(
+            "Release metadata YAML",
+            key=metadata_text_key,
+            height=150,
+            help=(
+                "Required: tags: [trend], release_group, pilot_auto_version, data_count, description, date. "
+                "date must look like 2026.5.22."
+            ),
+        )
+        metadata_error = ""
+        try:
+            trend_metadata = parse_trend_metadata_text(metadata_text)
+            if not str(trend_metadata.get("release_group") or "").strip():
+                raise ValueError("Release metadata requires non-empty `release_group`.")
+        except Exception as exc:
+            metadata_error = str(exc)
+            trend_metadata = {}
+            st.error(f"Release metadata error: {metadata_error}")
+
+        trend_topic_from_metadata = str(trend_metadata.get("topic_name") or "").strip()
+        if release_mode and trend_metadata and not trend_topic_from_metadata:
+            metadata_error = metadata_error or "Trend topic is required."
+            st.error("Trend topic is required.")
+        elif trend_metadata:
+            st.success("Release metadata looks valid.")
+
+        optional_catalog_enabled = st.checkbox(
+            "Also run Planning Test catalog",
+            value=bool(st.session_state.get("workflow_release_optional_catalog_enabled", False)),
+            key="workflow_release_optional_catalog_enabled",
+            help="Schedules the Planning Test catalog in addition to Performance and DevOps.",
+        )
+        existing_job_cols = st.columns(2)
+        with existing_job_cols[0]:
+            performance_job_id = st.text_input(
+                "Existing Performance job ID",
+                value=st.session_state.get("workflow_release_performance_job_id", ""),
+                key="workflow_release_performance_job_id",
+                placeholder="Leave empty to schedule a new Performance job",
+                help="Use this when the release Performance evaluator job is already scheduled or finished.",
+            ).strip()
+        with existing_job_cols[1]:
+            devops_job_id = st.text_input(
+                "Existing DevOps job ID",
+                value=st.session_state.get("workflow_release_devops_job_id", ""),
+                key="workflow_release_devops_job_id",
+                placeholder="Leave empty to schedule a new DevOps job",
+                help="Use this when the release DevOps evaluator job is already scheduled or finished.",
+            ).strip()
+        if optional_catalog_enabled:
+            optional_job_id = st.text_input(
+                "Existing Planning Test job ID",
+                value=st.session_state.get("workflow_release_optional_job_id", ""),
+                key="workflow_release_optional_job_id",
+                placeholder="Leave empty to schedule the Planning Test catalog",
+                help="Use this when the Planning Test evaluator job is already scheduled or finished.",
+            ).strip()
+        else:
+            optional_job_id = ""
+        output_dirs = "`performance/`, `devops/`, and `planning_test/`" if optional_catalog_enabled else "`performance/` and `devops/`"
+        st.caption(
+            f"Normal detailed-analysis outputs are generated automatically under {output_dirs}; existing job IDs are waited on if still running and downloaded if already finished."
+        )
+    else:
+        performance_job_id = ""
+        devops_job_id = ""
+        optional_catalog_enabled = False
+        optional_job_id = ""
+        metadata_error = ""
+
+    confirm_cols = st.columns([1.0, 1.0, 1.0] if release_mode and optional_catalog_enabled else [1.0, 1.0])
+    with confirm_cols[0]:
+        if release_mode:
+            st.caption(f"Performance catalog: `{_RELEASE_PERFORMANCE_CATALOG_ID}`")
+        elif catalog_id:
+            st.caption(f"Catalog ID: `{catalog_id}`")
+    with confirm_cols[1]:
+        if release_mode:
+            st.caption(f"DevOps catalog: `{_RELEASE_DEVOPS_CATALOG_ID}`")
+        elif integration_id:
+            st.caption(f"Integration ID: `{integration_id}`")
+    if release_mode and optional_catalog_enabled:
+        with confirm_cols[2]:
+            st.caption(f"Planning Test catalog: `{_RELEASE_OPTIONAL_CATALOG_ID}`")
+    if st.session_state.get("workflow_catalog_resolution_error"):
+        st.warning(f"Could not resolve integration automatically: {st.session_state['workflow_catalog_resolution_error']}")
+
+    if selected_catalog:
+        desc = str(selected_catalog.get("description") or "").strip() or "Preset selected for quick scheduling."
+        st.caption(f"Preset: {desc}")
+    elif selected_server_catalog:
+        desc = str(selected_server_catalog.get("description") or "").strip()
+        if desc:
+            st.caption(f"Fetched catalog: {desc}")
+
+    with st.expander("Advanced options", expanded=False):
+        adv_cols = st.columns([1.0, 1.0, 0.8, 0.8])
+        with adv_cols[0]:
+            download_type = st.radio(
+                "Download type",
+                ["Archives (ZIP)", "Result JSON"],
+                horizontal=True,
+                index=0 if default_download_type == "Archives (ZIP)" else 1,
+                key="workflow_download_type",
+                disabled=release_mode,
+                help=(
+                    "Release mode uses archives, but reuses existing downloaded artifacts when the output folders already contain them."
+                    if release_mode
+                    else None
+                ),
+            )
+        with adv_cols[1]:
+            environment = st.selectbox(
+                "Environment",
+                options=["", "dev", "stg", "prd"],
+                index=["", "dev", "stg", "prd"].index(default_environment) if default_environment in ("", "dev", "stg", "prd") else 0,
+                key="workflow_environment",
+                format_func=lambda value: value or "default",
+            )
+        with adv_cols[2]:
+            poll_interval = st.slider(
+                "Poll interval (s)",
+                min_value=10,
+                max_value=300,
+                value=default_poll_interval,
+                step=10,
+                key="workflow_poll_interval",
+            )
+        with adv_cols[3]:
+            max_wait_hours = st.slider(
+                "Max wait (h)",
+                min_value=1,
+                max_value=168,
+                value=default_max_wait_hours,
+                key="workflow_max_wait_hours",
+            )
+
+        option_cols = st.columns(5)
+        with option_cols[0]:
+            run_eval = st.checkbox(
+                "Run evaluation",
+                value=False if release_mode else True,
+                key="workflow_run_eval",
+                disabled=release_mode,
+                help="Release PDF generation uses parquet; eval/CSV detail checks can be run separately when needed.",
+            )
+        with option_cols[1]:
+            generate_parquet = st.checkbox(
+                "Generate parquet",
+                value=False if release_mode else CATALOG_IO_AVAILABLE,
+                disabled=release_mode or not CATALOG_IO_AVAILABLE,
+                key="workflow_generate_parquet",
+                help="Release mode generates parquet when missing; existing parquet is enough for PDF generation.",
+            )
+        with option_cols[2]:
+            skip_large_file = st.checkbox(
+                "Skip large files",
+                value=_RELEASE_SKIP_LARGE_FILE if release_mode else default_skip_large_file,
+                key="workflow_skip_large_file",
+                disabled=release_mode,
+                help=(
+                    f"Release mode always skips archives at or above {_RELEASE_LARGE_FILE_MB:g} MB."
+                    if release_mode
+                    else "Skip unusually large archives during download."
+                ),
+            )
+        with option_cols[3]:
+            eval_recursive = st.checkbox(
+                "Recursive scan",
+                value=False if release_mode else True,
+                key="workflow_eval_recursive",
+                disabled=release_mode,
+                help="Not used in release mode.",
+            )
+        with option_cols[4]:
+            is_tag = st.checkbox("Target is tag", value=False, key="workflow_is_tag")
+
+    set_config_value("eval_project_id", project_id)
+    set_config_value("target_name", target_name)
+    if not release_mode:
+        set_config_value("eval_download_type", download_type)
+        set_config_value("eval_phase", phase)
+    set_config_value("poll_interval", poll_interval)
+    set_config_value("max_wait_hours", max_wait_hours)
+    set_config_value("environment", environment)
+    set_config_value("workflow_description", description)
+    errors = []
+    if not project_id:
+        errors.append("Project ID")
+    if not release_mode and not catalog_id:
+        errors.append("Catalog")
+    if not release_mode and not integration_id:
+        errors.append("Integration ID")
+    if not target_name:
+        errors.append("Branch or tag")
+    if release_mode:
+        if not trend_metadata.get("release_group"):
+            errors.append("Release group")
+        if not trend_metadata.get("pilot_auto_version"):
+            errors.append("Pilot.Auto version")
+        if not trend_metadata.get("data_count"):
+            errors.append("Data count")
+        if not trend_metadata.get("date"):
+            errors.append("Release date")
+        if metadata_error:
+            errors.append(metadata_error)
+
+    resolved_output = None
+    path_error = ""
+    if output_path:
+        resolved_output, path_error = resolve_under_data_root(output_path, allow_missing=True)
+        if path_error:
+            errors.append(path_error)
+    else:
+        errors.append("Output folder")
+
+    return {
+        "project_id": project_id,
+        "environment": environment,
+        "output_path_default": output_path or _make_default_output_path(target_name),
+        "download_type_default": download_type,
+        "phase_default": phase,
+        "skip_large_file_default": True,
+        "large_file_mb_default": 50.0,
+        "keep_zip_files_default": False,
+        "dialog_payload": {
+            "errors": errors,
+            "project_id": project_id,
+            "catalog_id": catalog_id,
+            "integration_id": integration_id,
+            "catalog_preset_name": selected_catalog_name,
+            "has_custom_catalog": bool(catalog_id and not selected_catalog),
+            "target_name": target_name,
+            "description": description,
+            "resolved_output": str(resolved_output) if resolved_output else "",
+            "environment": environment,
+            "is_tag": is_tag,
+            "download_type": download_type,
+            "phase": phase,
+            "poll_interval": int(poll_interval),
+            "max_wait_hours": int(max_wait_hours),
+            "run_eval": False if release_mode else bool(run_eval),
+            "generate_parquet": False if release_mode else bool(generate_parquet),
+            "skip_large_file": _RELEASE_SKIP_LARGE_FILE if release_mode else bool(skip_large_file),
+            "eval_recursive": False if release_mode else bool(eval_recursive),
+            "release_mode": bool(release_mode),
+            "trend_metadata": trend_metadata if release_mode else {},
+            "performance_job_id": performance_job_id if release_mode else "",
+            "devops_job_id": devops_job_id if release_mode else "",
+            "optional_catalog_enabled": bool(optional_catalog_enabled) if release_mode else False,
+            "optional_catalog_id": _RELEASE_OPTIONAL_CATALOG_ID if release_mode and optional_catalog_enabled else "",
+            "optional_job_id": optional_job_id if release_mode and optional_catalog_enabled else "",
+        },
+    }
+
+
+def _render_workflow_launcher_section(
+    catalog_presets: List[Dict[str, str]],
+    catalogs_path: Optional[str],
+    catalog_load_error: Optional[str],
+) -> Dict[str, object]:
+    section_header("Run Evaluator Workflow", "")
+    start_defaults = _get_start_workflow_defaults()
+    if "workflow_start_dialog_open" not in st.session_state:
+        st.session_state["workflow_start_dialog_open"] = False
+    new_job_clicked = st.button(
+        "Start new workflow",
+        key="workflow_open_start_dialog",
+        type="primary",
+        use_container_width=False,
+    )
+
+    def _reset_start_workflow_state() -> None:
+        fresh_target = str(get_config_value("target_name", "beta/v4.3.2") or "beta/v4.3.2")
+        st.session_state["workflow_catalog_name"] = ""
+        st.session_state["workflow_last_catalog_preset"] = ""
+        st.session_state["workflow_catalog_id"] = ""
+        st.session_state["workflow_integration_id"] = ""
+        st.session_state["workflow_server_catalogs"] = []
+        st.session_state["workflow_server_catalog_error"] = ""
+        st.session_state["workflow_selected_server_catalog_id"] = ""
+        st.session_state["workflow_selected_server_catalog_label"] = ""
+        st.session_state["workflow_catalog_resolution_error"] = ""
+        st.session_state["workflow_last_catalog_selection"] = ""
+        st.session_state["workflow_release_performance_job_id"] = ""
+        st.session_state["workflow_release_devops_job_id"] = ""
+        st.session_state["workflow_release_trend_topic_label"] = "Prediction / object recognition"
+        st.session_state["workflow_release_custom_trend_topic"] = ""
+        st.session_state["workflow_output_path"] = _make_default_output_path(fresh_target)
+
+    def _render_start_workflow_controls(*, key_suffix: str = "dialog") -> None:
+        st.caption("This is the full launcher for creating a new evaluator job, downloading results, and optionally running eval/parquet.")
+        payload = _render_start_workflow_form(catalog_presets, catalogs_path, catalog_load_error)
+        submit_cols = st.columns([1.15, 1.15, 3.7])
+        close_clicked = submit_cols[0].button(
+            "Close",
+            key=f"workflow_close_start_{key_suffix}",
+            use_container_width=True,
+        )
+        start_clicked = submit_cols[1].button(
+            "Start workflow",
+            key=f"workflow_start_btn_{key_suffix}",
+            type="primary",
+            use_container_width=True,
+        )
+        if close_clicked:
+            st.session_state["workflow_start_dialog_open"] = False
+            st.rerun()
+        if start_clicked:
+            dialog_payload = dict(payload.get("dialog_payload") or {})
+            errors = dialog_payload.get("errors", [])
+            if errors:
+                for err in errors:
+                    st.error(f"Missing or invalid: {err}")
+            elif not is_task_queue_enabled():
+                st.error("Task queue not enabled. Set `USE_TASK_QUEUE=true` and `REDIS_URL`.")
+            else:
+                common_params = {
+                    "project_id": dialog_payload["project_id"],
+                    "suite_ids": None,
+                    "target_name": dialog_payload["target_name"],
+                    "environment": dialog_payload["environment"],
+                    "max_retries": 0,
+                    "clean_build": False,
+                    "debug": False,
+                    "release": False,
+                    "record_caret": False,
+                    "log_expiration_time_in_days": 14.0,
+                    "is_tag": dialog_payload["is_tag"],
+                    "download_type": "archives" if dialog_payload["download_type"] == "Archives (ZIP)" else "result_json",
+                    "phase": dialog_payload["phase"],
+                    "skip_large_file": bool(dialog_payload.get("skip_large_file", True)),
+                    "large_file_mb": 50.0,
+                    "keep_zip_files": False,
+                    "poll_interval": dialog_payload["poll_interval"],
+                    "max_wait_seconds": dialog_payload["max_wait_hours"] * 3600,
+                    "run_eval": dialog_payload["run_eval"],
+                    "generate_parquet": dialog_payload["generate_parquet"],
+                    "eval_recursive": dialog_payload["eval_recursive"],
+                    "eval_overwrite": False,
+                }
+                if dialog_payload.get("release_mode"):
+                    base_description = dialog_payload["description"] or _make_auto_release_workflow_description(
+                        dialog_payload["target_name"]
+                    )
+                    trend_metadata = dict(dialog_payload.get("trend_metadata") or {})
+                    task_id = _enqueue_task(
+                        "run_release_specsheet_workflow",
+                        {
+                            "project_id": dialog_payload["project_id"],
+                            "target_name": dialog_payload["target_name"],
+                            "description": base_description,
+                            "output_path": dialog_payload["resolved_output"],
+                            "environment": dialog_payload["environment"],
+                            "is_tag": dialog_payload["is_tag"],
+                            "poll_interval": dialog_payload["poll_interval"],
+                            "max_wait_seconds": dialog_payload["max_wait_hours"] * 3600,
+                            "trend_metadata": trend_metadata,
+                            "version": trend_metadata.get("pilot_auto_version", ""),
+                            "topic": trend_metadata.get("topic_name", ""),
+                            "performance_catalog_id": _RELEASE_PERFORMANCE_CATALOG_ID,
+                            "performance_integration_id": _RELEASE_PERFORMANCE_INTEGRATION_ID,
+                            "performance_job_id": dialog_payload.get("performance_job_id", ""),
+                            "devops_catalog_id": _RELEASE_DEVOPS_CATALOG_ID,
+                            "devops_integration_id": _RELEASE_DEVOPS_INTEGRATION_ID,
+                            "devops_job_id": dialog_payload.get("devops_job_id", ""),
+                            "optional_catalog_enabled": bool(dialog_payload.get("optional_catalog_enabled", False)),
+                            "optional_catalog_id": dialog_payload.get("optional_catalog_id", ""),
+                            "optional_job_id": dialog_payload.get("optional_job_id", ""),
+                            "analysis_phase": "perception.object_recognition.tracking.objects",
+                            "skip_large_file": _RELEASE_SKIP_LARGE_FILE,
+                            "large_file_mb": _RELEASE_LARGE_FILE_MB,
+                            "run_eval": bool(dialog_payload.get("run_eval", False)),
+                            "overwrite": True,
+                        },
+                    )
+                    if task_id:
+                        st.session_state["workflow_start_dialog_open"] = False
+                        st.success(f"Release specsheet workflow queued. Task id: `{task_id}`")
+                        st.rerun()
+                    else:
+                        st.error("Failed to enqueue release specsheet workflow. Check worker logs.")
+                    return
+
+                task_id = _enqueue_task(
+                    "run_evaluator_and_process",
+                    {
+                        **common_params,
+                        "catalog_id": dialog_payload["catalog_id"],
+                        "integration_id": dialog_payload["integration_id"],
+                        "catalog_preset_name": dialog_payload.get("catalog_preset_name", ""),
+                        "description": dialog_payload["description"] or _make_auto_workflow_description(
+                            dialog_payload["target_name"],
+                            dialog_payload.get("catalog_preset_name", ""),
+                            has_custom_catalog=bool(dialog_payload.get("has_custom_catalog", False)),
+                        ),
+                        "output_path": dialog_payload["resolved_output"],
+                    },
+                )
+                if task_id:
+                    st.session_state["workflow_start_dialog_open"] = False
+                    st.success(f"Workflow queued. Task id: `{task_id}`")
+                    st.rerun()
+                else:
+                    st.error("Failed to enqueue task. Check worker logs.")
+
+    if new_job_clicked:
+        st.session_state["workflow_start_dialog_open"] = True
+        _reset_start_workflow_state()
+
+    if st.session_state.get("workflow_start_dialog_open"):
+        if callable(getattr(st, "dialog", None)):
+            @st.dialog("Start evaluator workflow", width="large")
+            def _workflow_start_dialog() -> None:
+                _render_start_workflow_controls(key_suffix="dialog")
+
+            _workflow_start_dialog()
+        else:
+            st.markdown("---")
+            st.subheader("Start evaluator workflow")
+            _render_start_workflow_controls(key_suffix="inline")
+
+    return start_defaults
+
+
+_inject_workflow_page_styles()
+render_page_hero(
+    kicker="Workflow automation",
+    title="Evaluator Workflow",
+    description="Browse finished runs, watch background tasks, launch fresh evaluator pipelines, and reuse existing evaluator reports from one aligned workspace.",
+)
+
+catalog_presets, catalogs_path, catalog_load_error = _load_catalog_presets()
+
+tab_tasks, tab_local = st.tabs(["Run Tasks", "Local Runs"])
+
+with tab_tasks:
+    _render_current_tasks_section()
+    start_defaults = _render_workflow_launcher_section(catalog_presets, catalogs_path, catalog_load_error)
+
+    configure_recent_evaluator_jobs_ui(
+        get_config_value=get_config_value,
+        set_config_value=set_config_value,
+        enqueue_task=_enqueue_task,
+        catalog_io_available=CATALOG_IO_AVAILABLE,
+        environment=str(start_defaults["environment"] or ""),
+    )
+
+    _render_recent_evaluator_jobs_section(
+        str(start_defaults["project_id"] or ""),
+        str(start_defaults["environment"] or ""),
+        output_path_default=str(start_defaults["output_path_default"]),
+        download_type_default=str(start_defaults["download_type_default"]),
+        phase_default=str(start_defaults["phase_default"]),
+        skip_large_file_default=bool(start_defaults["skip_large_file_default"]),
+        large_file_mb_default=float(start_defaults["large_file_mb_default"]),
+        keep_zip_files_default=bool(start_defaults["keep_zip_files_default"]),
+        show_toggle=False,
+        default_visible=True,
+        show_title=False,
+    )
+
+with tab_local:
+    use_fragment = getattr(st, "fragment", None) is not None
+    if use_fragment:
+        try:
+
+            @st.fragment
+            def _local_runs_fragment():
+                _render_local_runs_section()
+
+            _local_runs_fragment()
+        except (TypeError, AttributeError):
+            _render_local_runs_section()
+    else:
+        _render_local_runs_section()
diff --git a/evaluation_dashboard_app/pages/7_Data_Management.py b/evaluation_dashboard_app/pages/7_Data_Management.py
index 050089d..4b45b86 100644
--- a/evaluation_dashboard_app/pages/7_Data_Management.py
+++ b/evaluation_dashboard_app/pages/7_Data_Management.py
@@ -5,6 +5,7 @@
 
 import io
 import re
+import urllib.parse
 import zipfile
 import streamlit as st
 from pathlib import Path
@@ -79,9 +80,10 @@
             key="share_run_b",
         )
 mode = "compare" if share_compare and share_run_b else "single"
-q = f"mode={mode}&run_a={share_run_a}"
+query = {"mode": mode, "run_a": share_run_a}
 if mode == "compare":
-    q += f"&run_b={share_run_b}"
+    query["run_b"] = share_run_b
+q = urllib.parse.urlencode(query)
 st.code(q, language=None)
 st.caption("Example: `https://your-server:8501/?` + the query above.")
 
diff --git a/evaluation_dashboard_app/pages/99_Deployment_Debug.py b/evaluation_dashboard_app/pages/99_Deployment_Debug.py
index a85b1fb..a46d093 100644
--- a/evaluation_dashboard_app/pages/99_Deployment_Debug.py
+++ b/evaluation_dashboard_app/pages/99_Deployment_Debug.py
@@ -4,18 +4,23 @@
 Must live as a top-level pages/*.py file so st.page_link can resolve it. Outside Docker, the default
 sidebar entry is hidden via CSS in lib/ui/styles_global.py; Overview shows a page_link only in Docker.
 """
+import json
 import os
-from datetime import timedelta
+from datetime import datetime, timedelta
+from typing import Any
 
 import pandas as pd
 import streamlit as st
 
+from lib.db import TASK_STATUSES, TASK_TYPES
 from lib.deploy_debug import (
     EXEC_TIMEOUT_SEC,
     MAX_LOG_TAIL_LINES,
     compose_project_filter,
     container_exec_command,
     container_logs_tail,
+    database_recent_task_rows,
+    database_table_overview,
     docker_client_or_none,
     is_docker_debug_enabled,
     is_exec_enabled,
@@ -27,7 +32,14 @@
     running_in_docker,
     task_counts_by_status,
 )
+from lib.docker_live_structure import live_containers_mermaid, rowset_has_t4_compose_service
+from lib.mermaid_render import render_mermaid
 from lib.page_chrome import inject_app_page_styles, render_page_hero, section_header
+from lib.t4_visualizer_client import (
+    ENV_BASE_URL as T4_ENV_BASE_URL,
+    T4VisualizerClient,
+    T4VisualizerError,
+)
 
 st.set_page_config(
     layout="wide",
@@ -46,19 +58,20 @@
     description=(
         "Check Postgres, Redis, and the RQ queue; inspect redacted environment variables; "
         "optionally list containers and tail logs when Docker socket access is enabled; "
-        "optional one-shot shell commands when `EVAL_DEPLOYMENT_DEBUG_EXEC=1`."
+        "optional one-shot shell commands when `EVAL_DEPLOYMENT_DEBUG_EXEC=1`. "
+        "The Docker tab’s live diagram includes the T4 dataset server (HTTP 2D/3D rendering) when configured."
     ),
     mode="Single Run",
 )
 
-tab_env, tab_dep, tab_tasks, tab_docker = st.tabs(
-    ["Environment", "Dependencies", "Tasks", "Docker"]
+tab_env, tab_dep, tab_tasks, tab_db, tab_docker = st.tabs(
+    ["Environment", "Dependencies", "Tasks", "Database", "Docker"]
 )
 
 with tab_env:
     section_header("Deployment environment", "Sensitive connection strings are redacted.")
     env_df = pd.DataFrame(redacted_deployment_env_rows(), columns=["Variable", "Value"])
-    st.dataframe(env_df, use_container_width=True, hide_index=True)
+    st.dataframe(env_df, width='stretch', hide_index=True)
 
 with tab_dep:
     section_header("Postgres")
@@ -93,20 +106,169 @@
         cdf = pd.DataFrame(
             [{"status": k, "count": v} for k, v in sorted(counts.items())]
         )
-        st.dataframe(cdf, use_container_width=True, hide_index=True)
+        st.dataframe(cdf, width='stretch', hide_index=True)
     elif ok_t:
         st.success("No task rows yet (empty table).")
     else:
         st.error(msg_t)
 
 
+def _debug_json(value: Any) -> str:
+    try:
+        return json.dumps(value, ensure_ascii=False, default=str, indent=2)
+    except (TypeError, ValueError):
+        return str(value)
+
+
+def _task_rows_dataframe(rows: list) -> pd.DataFrame:
+    display_rows = []
+    for row in rows:
+        params = row.get("parameters") or {}
+        if not isinstance(params, dict):
+            params = {}
+        display_rows.append(
+            {
+                "created_at": row.get("created_at"),
+                "updated_at": row.get("updated_at"),
+                "status": row.get("status"),
+                "type": row.get("type"),
+                "session_id": row.get("session_id"),
+                "id": str(row.get("id") or ""),
+                "rq_job_id": row.get("rq_job_id"),
+                "job_id": params.get("job_id")
+                or params.get("performance_job_id")
+                or params.get("devops_job_id")
+                or params.get("source_job_id")
+                or "",
+                "output_path": params.get("output_path") or params.get("output_dir") or "",
+                "progress_pct": row.get("progress_pct"),
+                "progress_message": row.get("progress_message"),
+                "result_path": row.get("result_path"),
+                "error_message": row.get("error_message"),
+            }
+        )
+    return pd.DataFrame(display_rows)
+
+
+def _format_progress_metric(value: Any) -> str:
+    try:
+        return f"{float(value or 0):g}%"
+    except (TypeError, ValueError):
+        return "0%"
+
+
+with tab_db:
+    section_header(
+        "Database inspector",
+        "Read-only view into Postgres tables and recent evaluator/task job history.",
+    )
+
+    ok_tables, msg_tables, table_rows = database_table_overview()
+    if ok_tables and table_rows is not None:
+        overview_df = pd.DataFrame(table_rows)
+        if not overview_df.empty:
+            overview_df["total_mb"] = (overview_df["total_bytes"] / (1024 * 1024)).round(2)
+            st.dataframe(
+                overview_df[["table_name", "estimated_rows", "total_mb"]],
+                width="stretch",
+                hide_index=True,
+            )
+        else:
+            st.info("No public tables found.")
+    elif not ok_tables:
+        st.error(msg_tables)
+
+    section_header("Recent job history", "Raw `tasks` rows, newest first, across all sessions.")
+    filters = st.columns([1.2, 1.6, 1.2, 1.2])
+    with filters[0]:
+        status_filter = st.selectbox(
+            "Status",
+            ["All", *TASK_STATUSES],
+            key="deploy_db_status",
+        )
+    with filters[1]:
+        type_filter = st.selectbox(
+            "Task type",
+            ["All", *TASK_TYPES],
+            key="deploy_db_type",
+        )
+    with filters[2]:
+        row_limit = st.number_input(
+            "Rows",
+            min_value=10,
+            max_value=500,
+            value=50,
+            step=10,
+            key="deploy_db_limit",
+        )
+    with filters[3]:
+        page = st.number_input(
+            "Page",
+            min_value=1,
+            max_value=1000,
+            value=1,
+            step=1,
+            key="deploy_db_page",
+        )
+    search = st.text_input(
+        "Search",
+        key="deploy_db_search",
+        placeholder="Task id, job id, session, path, error text, parameters",
+    )
+
+    ok_rows, msg_rows, rows, total_rows = database_recent_task_rows(
+        limit=int(row_limit),
+        offset=(int(page) - 1) * int(row_limit),
+        status=None if status_filter == "All" else status_filter,
+        task_type=None if type_filter == "All" else type_filter,
+        search=search.strip() or None,
+    )
+    if not ok_rows:
+        st.error(msg_rows)
+    elif not rows:
+        st.info("No task rows matched the current filters.")
+    else:
+        st.caption(f"Showing **{len(rows)}** of **{total_rows}** matching task rows.")
+        task_df = _task_rows_dataframe(rows)
+        st.dataframe(task_df, width="stretch", hide_index=True)
+
+        id_options = [str(row.get("id") or "") for row in rows]
+        selected_id = st.selectbox("Inspect row", id_options, key="deploy_db_task_inspect")
+        selected = next((row for row in rows if str(row.get("id") or "") == selected_id), None)
+        if selected:
+            meta_cols = st.columns(4)
+            meta_cols[0].metric("Status", str(selected.get("status") or "—"))
+            meta_cols[1].metric("Type", str(selected.get("type") or "—"))
+            meta_cols[2].metric("Progress", _format_progress_metric(selected.get("progress_pct")))
+            meta_cols[3].metric("Session", str(selected.get("session_id") or "—")[:32])
+
+            detail_tabs = st.tabs(["Parameters", "Result summary", "Log", "Raw row"])
+            with detail_tabs[0]:
+                st.code(_debug_json(selected.get("parameters") or {}), language="json")
+            with detail_tabs[1]:
+                raw_summary = selected.get("result_summary")
+                if raw_summary:
+                    try:
+                        parsed = json.loads(raw_summary) if isinstance(raw_summary, str) else raw_summary
+                        st.code(_debug_json(parsed), language="json")
+                    except (TypeError, ValueError):
+                        st.code(str(raw_summary), language=None)
+                else:
+                    st.info("No result summary stored for this row.")
+            with detail_tabs[2]:
+                log_text = (selected.get("log_output") or "").strip()
+                st.code(log_text or "(empty)", language=None)
+            with detail_tabs[3]:
+                st.code(_debug_json(selected), language="json")
+
+
 def _render_docker_disabled(reason: str) -> None:
     st.warning(reason)
     st.markdown(
         """
 **Enable Docker debug (trusted operators only)**
 
-1. From the `deploy/` directory, ensure `docker-compose.yml` mounts `/var/run/docker.sock` into the `streamlit` service and sets `EVAL_DEPLOYMENT_DEBUG_DOCKER=1`, then run `docker compose up -d` (or `docker compose up -d --force-recreate streamlit` after editing compose).
+1. From the `deploy/` directory, ensure `docker-compose.yml` mounts `/var/run/docker.sock` into each Streamlit service (`streamlit1`, `streamlit2`) and sets `EVAL_DEPLOYMENT_DEBUG_DOCKER=1`, then run `docker compose up -d` (or recreate those services after editing compose).
 
 2. Set `EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT` in `.env` to your Compose project name
    (same value as in `docker compose ls`) so the UI lists only this stack’s containers.
@@ -158,10 +320,104 @@ def _env_flag(name: str) -> bool:
     return os.environ.get(name, "").strip().lower() in ("1", "true", "yes")
 
 
-with tab_docker:
-    section_header("Containers & logs", "Requires `EVAL_DEPLOYMENT_DEBUG_DOCKER` and `/var/run/docker.sock` in the Streamlit container.")
+def _display_columns_for_containers(rows: list) -> pd.DataFrame:
+    """Column order for the live Docker table (hide internal full_id)."""
+    df = pd.DataFrame(rows)
+    if df.empty:
+        return df
+    preferred = [
+        "name",
+        "state",
+        "health",
+        "compose_service",
+        "compose_project",
+        "image",
+        "id",
+    ]
+    cols = [c for c in preferred if c in df.columns]
+    rest = [c for c in df.columns if c not in cols and c != "full_id"]
+    return df[cols + rest]
+
+
+def _render_live_stack_mermaid(rows: list) -> None:
+    """Help-style Mermaid (Clients / Edge / App Tier / T4 / …) with live container labels."""
+    if not rows:
+        return
 
+    t4_env = os.environ.get("T4_VISUALIZER_BASE_URL", "").strip()
+    if t4_env:
+        st.caption(
+            "**T4 dataset server** (2D/3D): HTTP API for `/render`, `/viewer/three`, and dataset availability. "
+            f"`T4_VISUALIZER_BASE_URL` = `{t4_env}`. "
+            "The diagram shows a matching Compose service if present, otherwise a synthetic node for this URL."
+        )
+    else:
+        st.caption(
+            "**T4 dataset server** (optional): used by Bounding Box Viewer and T4 3D Viewer. "
+            "Set `T4_VISUALIZER_BASE_URL` in `.env` to include it in the diagram (synthetic node). "
+            "Compose services named `t4_server`, `t4_visualizer`, or `t4_*` are grouped under **T4 dataset server**."
+        )
+
+    # Taller when URL is set or a t4_* Compose service is present (extra subgraph).
+    t4_svc = rowset_has_t4_compose_service(rows)
+    extra_h = 120 if (t4_env or t4_svc) else 40
+    mh = min(920, 280 + 52 * len(rows) + extra_h)
+    render_mermaid(live_containers_mermaid(rows), height=mh)
+
+
+def _render_t4_remote_probe(base_url: str) -> None:
+    """Fetch /health and /server/structure.json from the configured T4 visualizer host."""
+    base = base_url.rstrip("/")
+    section_header(
+        "T4 dataset server (HTTP)",
+        f"Live probe of `{T4_ENV_BASE_URL}` — same service as Bounding Box / T4 3D pages. "
+        "Open the links on the T4 host for the server’s own HTML diagram and diagnostics.",
+    )
+    st.markdown(
+        f"**On the T4 host:** [Structure (HTML)]({base}/server/structure) · "
+        f"[structure.json]({base}/server/structure.json) · "
+        f"[Health]({base}/health) · "
+        f"[Browser diagnostics]({base}/browser/diagnostics)"
+    )
+    try:
+        client = T4VisualizerClient(base_url=base, timeout=8.0)
+        health = client.health()
+    except T4VisualizerError as ex:
+        st.warning(f"Could not reach T4 server (`GET /health`): {ex}")
+        return
+    except OSError as ex:
+        st.warning(f"Could not reach T4 server: {ex}")
+        return
+
+    st.caption("GET /health")
+    st.json(health)
+
+    try:
+        structure = client.server_structure_json()
+    except T4VisualizerError as ex:
+        if ex.status_code == 404:
+            st.info(
+                "This T4 server does not expose `/server/structure.json` yet. "
+                "Upgrade **t4-server** (evaluator_result_parser) or use the links above if the server is older."
+            )
+        else:
+            st.warning(f"`GET /server/structure.json` failed: {ex}")
+        return
+
+    mmd = structure.get("mermaid") or ""
+    if mmd:
+        st.caption("Internal architecture (returned by t4-server — same diagram as `/server/structure`)")
+        mh = min(520, 160 + mmd.count("\n") * 26)
+        render_mermaid(mmd, height=mh)
+    meta = structure.get("meta")
+    if isinstance(meta, dict) and meta:
+        st.caption("Server meta (uptime, caches, diagnostics)")
+        st.json(meta)
+
+
+with tab_docker:
     client = docker_client_or_none()
+
     if client is None:
         if not _env_flag("EVAL_DEPLOYMENT_DEBUG_DOCKER"):
             _render_docker_disabled(
@@ -187,13 +443,6 @@ def _env_flag(name: str) -> bool:
                 )
     else:
         proj = compose_project_filter()
-        if proj:
-            st.caption(f"Filtering by Compose project label: `{proj}`")
-        else:
-            st.warning(
-                "Listing all containers on this Docker host. Set `EVAL_DEPLOYMENT_DEBUG_COMPOSE_PROJECT` in `.env` "
-                "to match `docker compose ls` and restrict the list."
-            )
 
         _use_fragment = getattr(st, "fragment", None) is not None
 
@@ -202,16 +451,24 @@ def _env_flag(name: str) -> bool:
             @st.fragment(run_every=timedelta(seconds=6))
             def _docker_fragment():
                 rows, list_warn = list_containers_for_debug(client)
+                st.caption(f"Last refreshed (server clock): **{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}** — updates about every 6 s.")
                 if list_warn and isinstance(list_warn, str) and list_warn.startswith("Docker list failed"):
                     st.error(list_warn)
                     return
                 if list_warn:
                     st.markdown(list_warn)
+                t4_probe_url = os.environ.get("T4_VISUALIZER_BASE_URL", "").strip()
                 if not rows:
                     st.info("No containers match the current filter.")
+                    if t4_probe_url:
+                        _render_t4_remote_probe(t4_probe_url)
                     return
-                display_df = pd.DataFrame(rows).drop(columns=["full_id"], errors="ignore")
-                st.dataframe(display_df, use_container_width=True, hide_index=True)
+                section_header("Live container table", "Sortable columns; `full_id` stays internal for log/exec.")
+                display_df = _display_columns_for_containers(rows)
+                st.dataframe(display_df, width='stretch', hide_index=True)
+                _render_live_stack_mermaid(rows)
+                if t4_probe_url:
+                    _render_t4_remote_probe(t4_probe_url)
 
                 options = [f"{r['name']} ({r['id']})" for r in rows]
                 id_by_label = {f"{r['name']} ({r['id']})": r["full_id"] for r in rows}
@@ -228,6 +485,7 @@ def _docker_fragment():
                 full_id = id_by_label[pick]
                 st.session_state.deploy_debug_cid = full_id
 
+                section_header("Logs", "Stdout/stderr from the selected container.")
                 tail = st.slider(
                     "Log tail (lines)",
                     min_value=50,
@@ -237,29 +495,33 @@ def _docker_fragment():
                     key="deploy_debug_tail",
                 )
                 logs = container_logs_tail(client, full_id, tail)
-                st.markdown("**Logs**")
                 st.code(logs or "(empty)", language=None)
                 _render_docker_exec_ui(client, full_id)
 
             _docker_fragment()
         else:
             rows, list_warn = list_containers_for_debug(client)
+            st.caption(f"Loaded at **{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}** — use Refresh to re-query.")
             if list_warn and isinstance(list_warn, str) and list_warn.startswith("Docker list failed"):
                 st.error(list_warn)
             elif list_warn:
                 st.markdown(list_warn)
+            t4_probe_url = os.environ.get("T4_VISUALIZER_BASE_URL", "").strip()
             if not rows:
                 st.info("No containers match the current filter.")
+                if t4_probe_url:
+                    _render_t4_remote_probe(t4_probe_url)
             else:
-                df = pd.DataFrame(rows)
-                st.dataframe(
-                    df.drop(columns=["full_id"], errors="ignore"),
-                    use_container_width=True,
-                    hide_index=True,
-                )
+                _render_live_stack_mermaid(rows)
+                if t4_probe_url:
+                    _render_t4_remote_probe(t4_probe_url)
+                section_header("Live container table", "Sortable columns; `full_id` stays internal for log/exec.")
+                display_df = _display_columns_for_containers(rows)
+                st.dataframe(display_df, width='stretch', hide_index=True)
                 options = [f"{r['name']} ({r['id']})" for r in rows]
                 id_by_label = {f"{r['name']} ({r['id']})": r["full_id"] for r in rows}
                 pick = st.selectbox("Container", options=options, key="deploy_debug_pick_legacy")
+                section_header("Logs", "Stdout/stderr from the selected container.")
                 tail = st.slider(
                     "Log tail (lines)",
                     min_value=50,
@@ -270,8 +532,9 @@ def _docker_fragment():
                 )
                 full_id_legacy = id_by_label[pick]
                 logs = container_logs_tail(client, full_id_legacy, tail)
-                st.markdown("**Logs**")
                 st.code(logs or "(empty)", language=None)
                 _render_docker_exec_ui(client, full_id_legacy)
                 if st.button("Refresh container list"):
                     st.rerun()
+
+    st.page_link("pages/10_Help.py", label="Help & guide (full README, including static stack Mermaid)", icon="❔")
diff --git a/evaluation_dashboard_app/pages/9_TLR_Analysis.py b/evaluation_dashboard_app/pages/9_TLR_Analysis.py
index 278a956..6f9519b 100644
--- a/evaluation_dashboard_app/pages/9_TLR_Analysis.py
+++ b/evaluation_dashboard_app/pages/9_TLR_Analysis.py
@@ -5,7 +5,11 @@
 Supports shareable URLs via query params: mode, path_a, path_b.
 """
 
+import json
+import html
+import os
 import streamlit as st
+import streamlit.components.v1 as components
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
@@ -14,6 +18,7 @@
 
 from lib.tlr_eval_analyzer import TLREvaluationAnalyzer
 from lib.path_utils import get_data_root, path_display, list_tlr_result_directories
+from lib.t4_visualizer_client import DEFAULT_BASE_URL, ENV_BASE_URL
 from lib.page_chrome import (
     inject_app_page_styles,
     render_loaded_data_section,
@@ -62,7 +67,7 @@ def get_or_load_analyzer(resolved_path: str):
     """Load analyzer for path; cache in session_state by path."""
     if not resolved_path:
         return None
-    cache_key = "tlr_analyzer_cache"
+    cache_key = "tlr_analyzer_cache_v2"
     if cache_key not in st.session_state:
         st.session_state[cache_key] = {}
     cache = st.session_state[cache_key]
@@ -78,7 +83,192 @@ def get_or_load_analyzer(resolved_path: str):
     return cache[resolved_path]
 
 
-def _render_single_tabs(analyzer, tab_criteria, tab_vehicle, tab_critical, tab_details):
+def _dataframe_to_json_bytes(df: pd.DataFrame, export_kind: str) -> bytes:
+    """Serialize a DataFrame to a stable JSON payload for downstream viewers."""
+    payload = {
+        "format_version": 1,
+        "export_kind": export_kind,
+        "columns": df.columns.tolist(),
+        "records": df.to_dict(orient="records"),
+    }
+    return json.dumps(payload, ensure_ascii=False, indent=2, default=str).encode("utf-8")
+
+
+def _build_tlr_eval_payload_by_frame(df: pd.DataFrame | None) -> dict:
+    """Build per-frame TLR evaluation payload for the embedded viewer."""
+    if df is None or df.empty or "frame_index" not in df.columns:
+        return {"type": "tlr_eval_clear"}
+
+    frames: dict[str, dict] = {}
+    ordered = df.sort_values(["frame_index", "scenario"]).reset_index(drop=True)
+    for _, row in ordered.iterrows():
+        try:
+            frame_key = str(int(row.get("frame_index", 0)))
+        except (TypeError, ValueError):
+            continue
+        if frame_key in frames:
+            continue
+
+        def _float_or_none(value):
+            try:
+                return None if pd.isna(value) else float(value)
+            except Exception:
+                return None
+
+        def _string_or_none(value):
+            try:
+                if pd.isna(value) or value == "":
+                    return None
+            except Exception:
+                pass
+            return str(value)
+
+        frames[frame_key] = {
+            "scenario": str(row.get("scenario", "") or ""),
+            "t4dataset_id": str(row.get("t4dataset_id", "") or ""),
+            "frame_name": str(row.get("frame_name", "") or ""),
+            "status": str(row.get("status", "") or ""),
+            "speed_kph": _float_or_none(row.get("speed_kph")),
+            "yaw_rate_deg_s": _float_or_none(row.get("yaw_rate_deg_s")),
+            "current_time": _float_or_none(row.get("current_time")),
+            "current_time_us": (
+                int(round(float(row.get("current_time")) * 1_000_000))
+                if _float_or_none(row.get("current_time")) not in (None, 0.0)
+                else None
+            ),
+            "traffic_light_type": str(row.get("traffic_light_type", "") or ""),
+            "evaluation_result": str(row.get("traffic_light_type", "") or ""),
+            "criteria": str(row.get("criteria", "") or ""),
+            "tp": _string_or_none(row.get("tp")),
+            "fp": _string_or_none(row.get("fp")),
+            "fn": _string_or_none(row.get("fn")),
+            "tn": _string_or_none(row.get("tn")),
+        }
+    return {"type": "tlr_eval_by_frame", "frames": frames}
+
+
+def _render_tlr_viewer_embed(viewer_url: str, payload: dict, *, iframe_id: str, height: int = 1400) -> None:
+    """Embed `/viewer/tlr` and post a frame-indexed evaluation payload into the iframe."""
+    payload_json = json.dumps(payload, ensure_ascii=True)
+    payload_hex = payload_json.encode("utf-8").hex()
+    iframe_src = html.escape(viewer_url, quote=True)
+    components.html(
+        (
+            f'<iframe id="{iframe_id}" src="{iframe_src}" '
+            f'width="100%" height="{height}" style="border:none;border-radius:8px;background:#e2e8f0" '
+            f'allowfullscreen allow="fullscreen *" '
+            f'loading="lazy" title="Traffic light viewer" referrerpolicy="no-referrer-when-downgrade"></iframe>'
+            "<script>"
+            "(()=>{"
+            f"const iframe=document.getElementById('{iframe_id}');"
+            f"const payloadHex='{payload_hex}';"
+            "const hexToUtf8=(hex)=>{"
+            "if(!hex||hex.length%2!==0)return '';"
+            "const bytes=new Uint8Array(hex.length/2);"
+            "for(let i=0;i<hex.length;i+=2){bytes[i/2]=parseInt(hex.slice(i,i+2),16)||0;}"
+            "return new TextDecoder().decode(bytes);"
+            "};"
+            "let payload={type:'tlr_eval_clear'};"
+            "try{"
+            "const payloadJson=hexToUtf8(payloadHex);"
+            "payload=JSON.parse(payloadJson);"
+            "const fc=payload.frames&&typeof payload.frames==='object'?Object.keys(payload.frames).length:0;"
+            "console.info('[tlr-debug] payload prepared', {type:payload.type,frames:fc});"
+            "}catch(err){"
+            "console.error('[tlr-debug] payload parse failed', err);"
+            "}"
+            "let postCount=0;"
+            "const post=(reason)=>{"
+            "if(!iframe||!iframe.contentWindow)return;"
+            "let targetOrigin='*';"
+            "try{ targetOrigin = new URL(iframe.src, window.location.href).origin || '*'; }catch(_){ targetOrigin='*'; }"
+            "postCount+=1;"
+            "iframe.contentWindow.postMessage(payload,targetOrigin);"
+            "console.info('[tlr-debug] postMessage sent', {reason,postCount,targetOrigin,payloadType:payload.type});"
+            "};"
+            "iframe.addEventListener('load',()=>{"
+            "post('iframe-load');"
+            "let n=0;"
+            "const t=setInterval(()=>{post('retry');n+=1;if(n>12)clearInterval(t);},250);"
+            "});"
+            "setTimeout(()=>post('initial-delay-300ms'),300);"
+            "setTimeout(()=>post('initial-delay-1200ms'),1200);"
+            "})();"
+            "</script>"
+        ),
+        height=height + 8,
+        scrolling=False,
+    )
+
+
+def _render_tlr_viewer_tab(detail_sources: dict[str, pd.DataFrame | None], *, key_prefix: str) -> None:
+    st.subheader("Embedded traffic light viewer")
+    st.caption("Pick a dataset from the current TLR details, then load the external `/viewer/tlr` page inline.")
+
+    if f"{key_prefix}_base_url" not in st.session_state:
+        st.session_state[f"{key_prefix}_base_url"] = (
+            (os.environ.get(ENV_BASE_URL) or DEFAULT_BASE_URL).strip() or DEFAULT_BASE_URL
+        )
+
+    base_url = st.text_input(
+        "T4 server base URL",
+        key=f"{key_prefix}_base_url",
+        help=f"Default from env `{ENV_BASE_URL}`. The viewer URL is `/viewer/tlr?t4dataset_id=...&frame_index=...`.",
+    )
+
+    available_labels = [label for label, df in detail_sources.items() if df is not None and not df.empty]
+    if not available_labels:
+        st.info("No TLR detail rows available to drive the viewer.")
+        return
+
+    source_label = available_labels[0]
+    if len(available_labels) > 1:
+        source_label = st.radio(
+            "Use rows from",
+            available_labels,
+            horizontal=True,
+            key=f"{key_prefix}_source_label",
+        )
+
+    details_df = detail_sources[source_label].copy()
+    details_df = details_df[details_df["t4dataset_id"].fillna("").astype(str) != ""].copy()
+    if details_df.empty:
+        st.info("The selected rows do not contain any `t4dataset_id` values.")
+        return
+
+    dataset_options = sorted(details_df["t4dataset_id"].astype(str).unique().tolist())
+    selected_dataset = st.selectbox(
+        "Candidate t4dataset_id",
+        dataset_options,
+        key=f"{key_prefix}_dataset_id",
+    )
+    dataset_rows = details_df[details_df["t4dataset_id"].astype(str) == selected_dataset].copy()
+
+    if dataset_rows.empty:
+        st.info("No rows match the current dataset selection.")
+        return
+
+    dataset_rows = dataset_rows.sort_values(["scenario", "frame_index"]).reset_index(drop=True)
+    selected_row = dataset_rows.iloc[0]
+    selected_frame = int(selected_row["frame_index"])
+    payload = _build_tlr_eval_payload_by_frame(dataset_rows)
+
+    viewer_url = f"{base_url.rstrip('/')}/viewer/tlr?t4dataset_id={quote(selected_dataset, safe='')}&frame_index={selected_frame}"
+    st.markdown(f"[Open `/viewer/tlr` in new tab]({viewer_url})")
+    st.caption(
+        f"Using the first available frame for this dataset: `frame_index={selected_frame}` from `{selected_row['scenario']}`."
+    )
+
+    preview_cols = ["scenario", "frame_index", "status", "traffic_light_type", "criteria"]
+    if "frame_name" in dataset_rows.columns:
+        preview_cols.insert(2, "frame_name")
+    with st.expander("Matching rows", expanded=False):
+        st.dataframe(dataset_rows[preview_cols].sort_values(["scenario", "frame_index"]), width="stretch", hide_index=True)
+
+    _render_tlr_viewer_embed(viewer_url, payload, iframe_id=f"{key_prefix}_iframe", height=1600)
+
+
+def _render_single_tabs(analyzer, tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer):
     with tab_criteria:
         st.subheader("Criteria: TP rate and total frames")
         criteria_df = analyzer.create_criteria_matrix()
@@ -136,12 +326,98 @@ def _render_single_tabs(analyzer, tab_criteria, tab_vehicle, tab_critical, tab_d
         details_df = analyzer.get_vehicle_status_details_df()
         if details_df is not None and not details_df.empty:
             st.caption("One row per frame. Use filters to narrow down by scenario, status, or traffic light type.")
-            st.dataframe(details_df, width='stretch', hide_index=True)
+            filtered_details = details_df.copy()
+            all_scenarios = sorted(filtered_details["scenario"].dropna().astype(str).unique().tolist())
+            all_statuses = sorted(filtered_details["status"].dropna().astype(str).unique().tolist())
+            all_tlr_types = sorted(filtered_details["traffic_light_type"].dropna().astype(str).unique().tolist())
+
+            with st.expander("Filters & sort", expanded=False):
+                f1, f2, f3 = st.columns(3)
+                with f1:
+                    sel_scenarios = st.multiselect(
+                        "Scenario(s)",
+                        options=all_scenarios,
+                        default=[],
+                        key="tlr_single_tab_filter_scenario",
+                        help="Leave empty to show all scenarios.",
+                    )
+                with f2:
+                    sel_statuses = st.multiselect(
+                        "Vehicle status",
+                        options=all_statuses,
+                        default=[],
+                        key="tlr_single_tab_filter_status",
+                        help="Leave empty to show all statuses.",
+                    )
+                with f3:
+                    sel_tlr_types = st.multiselect(
+                        "Traffic light type",
+                        options=all_tlr_types,
+                        default=[],
+                        key="tlr_single_tab_filter_tlr_type",
+                        help="Leave empty to show all traffic light types.",
+                    )
+                sort_by = st.selectbox(
+                    "Sort by",
+                    [
+                        "Scenario, then frame index",
+                        "Frame index only",
+                        "Vehicle status, then scenario, frame index",
+                        "Traffic light type, then scenario, frame index",
+                    ],
+                    key="tlr_single_tab_sort_by",
+                )
+
+            if sel_scenarios:
+                filtered_details = filtered_details[filtered_details["scenario"].astype(str).isin(sel_scenarios)]
+            if sel_statuses:
+                filtered_details = filtered_details[filtered_details["status"].astype(str).isin(sel_statuses)]
+            if sel_tlr_types:
+                filtered_details = filtered_details[
+                    filtered_details["traffic_light_type"].astype(str).isin(sel_tlr_types)
+                ]
+
+            if sort_by == "Scenario, then frame index":
+                filtered_details = filtered_details.sort_values(["scenario", "frame_index"]).reset_index(drop=True)
+            elif sort_by == "Frame index only":
+                filtered_details = filtered_details.sort_values(["frame_index", "scenario"]).reset_index(drop=True)
+            elif sort_by == "Vehicle status, then scenario, frame index":
+                filtered_details = filtered_details.sort_values(["status", "scenario", "frame_index"]).reset_index(drop=True)
+            else:
+                filtered_details = filtered_details.sort_values(
+                    ["traffic_light_type", "scenario", "frame_index"]
+                ).reset_index(drop=True)
+
+            st.dataframe(filtered_details, width='stretch', hide_index=True)
+            caption = f"Showing **{len(filtered_details)}** frame(s). Total before filters: {len(details_df)}."
+            if sel_scenarios or sel_statuses or sel_tlr_types:
+                caption += " Filters applied."
+            st.caption(caption)
+            dl_col_csv, dl_col_json = st.columns(2)
+            with dl_col_csv:
+                st.download_button(
+                    "Download as CSV",
+                    data=filtered_details.to_csv(index=False).encode("utf-8"),
+                    file_name="tlr_details.csv",
+                    mime="text/csv",
+                    key="tlr_dl_single_tab_csv",
+                )
+            with dl_col_json:
+                st.download_button(
+                    "Download as JSON",
+                    data=_dataframe_to_json_bytes(filtered_details, export_kind="single_dataset_details"),
+                    file_name="tlr_details.json",
+                    mime="application/json",
+                    key="tlr_dl_single_tab_json",
+                )
         else:
             st.info("No vehicle status details available.")
 
+    with tab_tlr_viewer:
+        _render_tlr_viewer_tab({"Current run": analyzer.get_vehicle_status_details_df()}, key_prefix="tlr_single_viewer")
+
 
-def _render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria, tab_vehicle, tab_critical, tab_details):
+def _render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer):
     with tab_criteria:
         st.subheader("Criteria: A vs B (TP rate and delta)")
         df_a = analyzer_a.create_criteria_matrix()
@@ -244,11 +520,27 @@ def _render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria,
         details_b = analyzer_b.get_vehicle_status_details_df()
         if details_a is not None and not details_a.empty and details_b is not None and not details_b.empty:
             merge_keys = ["scenario", "frame_index"]
-            a_sub = details_a[merge_keys + ["frame_name", "status", "traffic_light_type"]].copy()
-            a_sub = a_sub.rename(columns={"frame_name": "frame_name_a", "status": "status_a", "traffic_light_type": f"traffic_light_type ({label_a})"})
-            b_sub = details_b[merge_keys + ["frame_name", "status", "traffic_light_type"]].copy()
-            b_sub = b_sub.rename(columns={"frame_name": "frame_name_b", "status": "status_b", "traffic_light_type": f"traffic_light_type ({label_b})"})
+            a_sub = details_a[merge_keys + ["t4dataset_id", "frame_name", "status", "traffic_light_type"]].copy()
+            a_sub = a_sub.rename(
+                columns={
+                    "t4dataset_id": f"t4dataset_id ({label_a})",
+                    "frame_name": "frame_name_a",
+                    "status": "status_a",
+                    "traffic_light_type": f"traffic_light_type ({label_a})",
+                }
+            )
+            b_sub = details_b[merge_keys + ["t4dataset_id", "frame_name", "status", "traffic_light_type"]].copy()
+            b_sub = b_sub.rename(
+                columns={
+                    "t4dataset_id": f"t4dataset_id ({label_b})",
+                    "frame_name": "frame_name_b",
+                    "status": "status_b",
+                    "traffic_light_type": f"traffic_light_type ({label_b})",
+                }
+            )
             merged = a_sub.merge(b_sub, on=merge_keys, how="inner")
+            dataset_col_a = f"t4dataset_id ({label_a})"
+            dataset_col_b = f"t4dataset_id ({label_b})"
             tlr_col_a = f"traffic_light_type ({label_a})"
             tlr_col_b = f"traffic_light_type ({label_b})"
             merged["_diff"] = merged[tlr_col_a] != merged[tlr_col_b]
@@ -356,7 +648,7 @@ def _render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria,
                 if not to_show_diff.empty:
                     st.markdown("**Frames where traffic light type differs (A vs B)**")
                     display_df = to_show_diff[[
-                        "scenario", "frame_index",
+                        "scenario", dataset_col_a, dataset_col_b, "frame_index",
                         tlr_col_a, tlr_col_b,
                         "status_a", "status_b",
                     ]].copy()
@@ -373,7 +665,12 @@ def _highlight_diff_columns(series):
                     st.caption(caption)
                     # Download CSV
                     csv_bytes = display_df.to_csv(index=False).encode("utf-8")
-                    st.download_button("Download as CSV", data=csv_bytes, file_name="tlr_diff_frames.csv", mime="text/csv", key="tlr_dl_diff")
+                    json_bytes = _dataframe_to_json_bytes(display_df, export_kind="compare_diff_frames")
+                    dl_col_csv, dl_col_json = st.columns(2)
+                    with dl_col_csv:
+                        st.download_button("Download as CSV", data=csv_bytes, file_name="tlr_diff_frames.csv", mime="text/csv", key="tlr_dl_diff")
+                    with dl_col_json:
+                        st.download_button("Download as JSON", data=json_bytes, file_name="tlr_diff_frames.json", mime="application/json", key="tlr_dl_diff_json")
                 else:
                     st.info(
                         f"No frames with different traffic light type between {label_a} and {label_b}"
@@ -382,7 +679,7 @@ def _highlight_diff_columns(series):
             else:
                 st.markdown("**All frames (A vs B)** — rows where traffic light type differs are highlighted.")
                 display_df = to_show_merged[[
-                    "scenario", "frame_index",
+                    "scenario", dataset_col_a, dataset_col_b, "frame_index",
                     tlr_col_a, tlr_col_b,
                     "status_a", "status_b",
                 ]].copy()
@@ -402,7 +699,12 @@ def _highlight_diff_rows(df):
                     caption += " Filters applied."
                 st.caption(caption)
                 csv_bytes = display_df.to_csv(index=False).encode("utf-8")
-                st.download_button("Download as CSV", data=csv_bytes, file_name="tlr_compare_all_frames.csv", mime="text/csv", key="tlr_dl_all")
+                json_bytes = _dataframe_to_json_bytes(display_df, export_kind="compare_all_frames")
+                dl_col_csv, dl_col_json = st.columns(2)
+                with dl_col_csv:
+                    st.download_button("Download as CSV", data=csv_bytes, file_name="tlr_compare_all_frames.csv", mime="text/csv", key="tlr_dl_all")
+                with dl_col_json:
+                    st.download_button("Download as JSON", data=json_bytes, file_name="tlr_compare_all_frames.json", mime="application/json", key="tlr_dl_all_json")
         else:
             st.caption("Need details from both A and B to show traffic light type differences.")
         st.markdown("---")
@@ -425,16 +727,37 @@ def _highlight_diff_rows(df):
                 details_df = details_df[details_df["scenario"].isin(single_sel)]
             st.dataframe(details_df, width='stretch', hide_index=True)
             if not details_df.empty:
-                st.download_button(
-                    "Download as CSV",
-                    data=details_df.to_csv(index=False).encode("utf-8"),
-                    file_name=f"tlr_details_{view_which.replace(' ', '_')}.csv",
-                    mime="text/csv",
-                    key="tlr_dl_single",
-                )
+                csv_name = f"tlr_details_{view_which.replace(' ', '_')}.csv"
+                json_name = f"tlr_details_{view_which.replace(' ', '_')}.json"
+                dl_col_csv, dl_col_json = st.columns(2)
+                with dl_col_csv:
+                    st.download_button(
+                        "Download as CSV",
+                        data=details_df.to_csv(index=False).encode("utf-8"),
+                        file_name=csv_name,
+                        mime="text/csv",
+                        key="tlr_dl_single",
+                    )
+                with dl_col_json:
+                    st.download_button(
+                        "Download as JSON",
+                        data=_dataframe_to_json_bytes(details_df, export_kind="per_dataset_details"),
+                        file_name=json_name,
+                        mime="application/json",
+                        key="tlr_dl_single_json",
+                    )
         else:
             st.info("No vehicle status details available.")
 
+    with tab_tlr_viewer:
+        _render_tlr_viewer_tab(
+            {
+                label_a: analyzer_a.get_vehicle_status_details_df(),
+                label_b: analyzer_b.get_vehicle_status_details_df(),
+            },
+            key_prefix="tlr_compare_viewer",
+        )
+
 
 # ----- Sidebar: mode and TLR directory selection -----
 st.sidebar.markdown("##### TLR data")
@@ -568,10 +891,10 @@ def _highlight_diff_rows(df):
             f"Worst: **{stats['worst_criteria']}** (TP rate {stats['worst_tp_rate']:.2%})"
         )
 
-    tab_criteria, tab_vehicle, tab_critical, tab_details = st.tabs([
-        "Criteria matrix", "Vehicle status vs TLR type", "Critical & priority zones", "Vehicle status details",
+    tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer = st.tabs([
+        "Criteria matrix", "Vehicle status vs TLR type", "Critical & priority zones", "Vehicle status details", "TLR viewer",
     ])
-    _render_single_tabs(analyzer_a, tab_criteria, tab_vehicle, tab_critical, tab_details)
+    _render_single_tabs(analyzer_a, tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer)
     st.stop()
 
 # ========== COMPARE MODE ==========
@@ -606,7 +929,7 @@ def _highlight_diff_rows(df):
     st.metric("Total TP", f"{stats_b['total_tp']:,}")
     st.metric("Overall TP rate", f"{stats_b['overall_tp_rate']:.2%}")
 
-tab_criteria, tab_vehicle, tab_critical, tab_details = st.tabs([
-    "Criteria matrix", "Vehicle status vs TLR type", "Critical & priority zones", "Vehicle status details",
+tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer = st.tabs([
+    "Criteria matrix", "Vehicle status vs TLR type", "Critical & priority zones", "Vehicle status details", "TLR viewer",
 ])
-_render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria, tab_vehicle, tab_critical, tab_details)
+_render_compare_tabs(analyzer_a, analyzer_b, label_a, label_b, tab_criteria, tab_vehicle, tab_critical, tab_details, tab_tlr_viewer)
diff --git a/evaluation_dashboard_app/requirements-docker.txt b/evaluation_dashboard_app/requirements-docker.txt
old mode 100644
new mode 100755
index aeb7130..7486fc0
--- a/evaluation_dashboard_app/requirements-docker.txt
+++ b/evaluation_dashboard_app/requirements-docker.txt
@@ -4,12 +4,15 @@
 streamlit>=1.30.0
 pandas>=2.0.0
 plotly>=5.18.0
+kaleido>=0.2.1
 duckdb>=0.9.0
 numpy>=1.24.0
 matplotlib>=3.7.0
 shapely>=2.0.0
+polars>=1.0.0
 requests>=2.31.0
 PyYAML>=6.0
+reportlab>=4.0.0
 rq>=1.15.0
 psycopg2-binary>=2.9.0
-docker>=7.0.0,<8
\ No newline at end of file
+docker>=7.0.0,<8
diff --git a/evaluation_dashboard_app/requirements.txt b/evaluation_dashboard_app/requirements.txt
old mode 100644
new mode 100755
index 6bbc2f5..74726e8
--- a/evaluation_dashboard_app/requirements.txt
+++ b/evaluation_dashboard_app/requirements.txt
@@ -1,12 +1,14 @@
 streamlit>=1.30.0
 pandas>=2.0.0
 plotly>=5.18.0
+kaleido>=0.2.1
 duckdb>=0.9.0
 numpy>=1.24.0
 matplotlib>=3.7.0
 shapely>=2.0.0
 requests>=2.31.0
 PyYAML>=6.0
+reportlab>=4.0.0
 # Production: task queue and metadata
 rq>=1.15.0
 psycopg2-binary>=2.9.0
diff --git a/evaluation_dashboard_app/scripts/import_catalog_analyzer_releases.py b/evaluation_dashboard_app/scripts/import_catalog_analyzer_releases.py
new file mode 100644
index 0000000..11d26e9
--- /dev/null
+++ b/evaluation_dashboard_app/scripts/import_catalog_analyzer_releases.py
@@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+"""Import perception_catalog_analyzer release exports into dashboard data.
+
+This script converts release data generated directly by
+perception_catalog_analyzer into the dashboard's release/trend structure.
+
+Expected source layout:
+
+    perception_catalog_analyzer_output/
+      export/
+        <job_id>/
+          metadata.yaml
+          current.parquet
+          future.parquet
+          devops.parquet
+          detection.yaml
+      pdf/
+        <group_name>/
+          <topic_name>/
+            <job_id>/
+              metadata.yaml
+              summary.json
+            specsheet/
+              specsheet.pdf
+
+Here <group_name> is usually a joined list of evaluator job IDs, for example:
+
+    <full_job_id>_<usecase_job_id>_<devops_job_id>
+
+Generated dashboard layout:
+
+    data/
+      release_spec_<group_name>/
+        metadata.yaml
+        performance/
+          metadata.yaml
+          resources/summary.json
+          current.parquet
+          future.parquet
+          detection.yaml
+        usecase/
+          metadata.yaml
+          resources/summary.json
+        devops/
+          metadata.yaml
+          resources/summary.json
+          current.parquet
+        specsheet/
+          specsheet.pdf
+          <topic_name>/specsheet.pdf
+
+      trend_release_<group_name>/
+        <topic_name>/
+          <job_id>/
+            metadata.yaml
+            summary.json
+          specsheet/specsheet.pdf
+
+    static/
+      release_specs/
+        <group_name>/
+          <topic_name>.pdf
+
+By default, large artifacts such as parquet/PDF/HTML/PNG are symlinked to avoid
+duplicating very large analyzer output. Use --copy-large-artifacts when the
+original analyzer output may be removed or unavailable from the server.
+
+Common usage:
+
+    cd /path/to/evaluation_dashboard_app
+    python scripts/import_catalog_analyzer_releases.py \\
+      --source /path/to/perception_catalog_analyzer_output \\
+      --data-root /path/to/dashboard/data \\
+      --force
+
+Production/server usage when source data should not remain mounted:
+
+    python scripts/import_catalog_analyzer_releases.py \\
+      --source /mnt/catalog_analyzer_output \\
+      --data-root /srv/eval_dashboard/data \\
+      --copy-large-artifacts \\
+      --force
+
+After import, make sure the app serves static PDFs from static/. In this app's
+Docker setup, static/ is mounted into /app/static and Streamlit static serving
+is enabled.
+
+If the app directory is read-only on a server, either:
+
+    - pass --static-root /writable/path/release_specs and mount that path as
+      /app/static/release_specs, or
+    - pass --skip-static-publish to import data only. PDF files are still copied
+      into data/release_spec_*/specsheet and data/trend_release_*/specsheet.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import shutil
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+ANALYZER_ROOT = Path("/home/leigu/Downloads/perception_catalog_analyzer_output")
+MAIN_TOPIC = "perception.object_recognition.objects"
+ROLE_DIR_BY_SUMMARY_ROLE = {
+    "full": "performance",
+    "usecase": "usecase",
+    "devops": "devops",
+    "performance_blocks": "performance",
+    "unknown": "unknown",
+}
+DEFAULT_PROJECT_ID = "x2_dev"
+SUMMARY_FULL_HEADER = "全数データセット評価"
+SUMMARY_USECASE_HEADER = "ユースケース評価"
+LARGE_SUFFIXES = {".parquet", ".html", ".png"}
+
+
+@dataclass(frozen=True)
+class ImportStats:
+    releases: int = 0
+    trend_jobs: int = 0
+    role_runs: int = 0
+    linked: int = 0
+    copied: int = 0
+    skipped: int = 0
+
+    def add(self, **kwargs: int) -> "ImportStats":
+        values = self.__dict__.copy()
+        for key, value in kwargs.items():
+            values[key] = int(values.get(key, 0)) + value
+        return ImportStats(**values)
+
+
+def _data_root() -> Path:
+    raw = os.environ.get("EVAL_DASHBOARD_DATA_ROOT", "data")
+    root = Path(raw)
+    if not root.is_absolute():
+        root = Path.cwd() / root
+    root.mkdir(parents=True, exist_ok=True)
+    return root.resolve()
+
+
+def _safe_path_part(value: str, fallback: str) -> str:
+    import re
+
+    text = re.sub(r"[^\w.\-]+", "_", str(value or "")).strip("._")
+    return text or fallback
+
+
+def _load_json(path: Path) -> dict[str, Any]:
+    with path.open("r", encoding="utf-8") as fh:
+        data = json.load(fh)
+    return data if isinstance(data, dict) else {}
+
+
+def _load_yaml(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    with path.open("r", encoding="utf-8") as fh:
+        data = yaml.safe_load(fh) or {}
+    return data if isinstance(data, dict) else {}
+
+
+def _classify_summary(summary: dict[str, Any]) -> str:
+    blocks = summary.get("blocks")
+    if isinstance(blocks, list):
+        headers = [str(block.get("header") or "") for block in blocks if isinstance(block, dict)]
+        if SUMMARY_FULL_HEADER in headers:
+            return "full"
+        if SUMMARY_USECASE_HEADER in headers:
+            return "usecase"
+        return "performance_blocks"
+    if summary:
+        return "devops"
+    return "unknown"
+
+
+def _write_yaml(path: Path, payload: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as fh:
+        yaml.safe_dump(payload, fh, allow_unicode=True, sort_keys=False)
+
+
+def _copy_or_link(src: Path, dst: Path, *, copy_large_artifacts: bool, force: bool) -> str:
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    if dst.exists() or dst.is_symlink():
+        if not force:
+            return "skipped"
+        if dst.is_dir() and not dst.is_symlink():
+            shutil.rmtree(dst)
+        else:
+            dst.unlink()
+
+    should_link = src.suffix.lower() in LARGE_SUFFIXES and not copy_large_artifacts
+    if should_link:
+        os.symlink(src.resolve(), dst)
+        return "linked"
+    shutil.copy2(src, dst)
+    return "copied"
+
+
+def _publish_static_pdf(pdf_path: Path, static_pdf_path: Path, *, force: bool) -> str:
+    static_pdf_path.parent.mkdir(parents=True, exist_ok=True)
+    if static_pdf_path.exists() or static_pdf_path.is_symlink():
+        if not force:
+            return "skipped"
+        static_pdf_path.unlink()
+    source = pdf_path.resolve()
+    try:
+        os.link(source, static_pdf_path)
+    except OSError:
+        shutil.copy2(source, static_pdf_path)
+    return "copied"
+
+
+def _artifact_stat(stats: ImportStats, action: str) -> ImportStats:
+    if action == "linked":
+        return stats.add(linked=1)
+    if action == "copied":
+        return stats.add(copied=1)
+    if action == "skipped":
+        return stats.add(skipped=1)
+    return stats
+
+
+def _merge_metadata(base: dict[str, Any], *, group_name: str, topic_name: str, job_id: str, role: str) -> dict[str, Any]:
+    evaluator_info = base.get("evaluator_info") if isinstance(base.get("evaluator_info"), dict) else {}
+    catalog = evaluator_info.get("catalog") if isinstance(evaluator_info.get("catalog"), dict) else {}
+    source = evaluator_info.get("event", {}).get("source", {}) if isinstance(evaluator_info.get("event"), dict) else {}
+    project_id = str(base.get("project_id") or DEFAULT_PROJECT_ID).strip()
+    merged = {
+        key: base.get(key)
+        for key in (
+            "tags",
+            "pilot_auto_version",
+            "version_abbr",
+            "data_count",
+            "description",
+            "date",
+        )
+        if base.get(key) not in (None, "")
+    }
+    if catalog:
+        merged["catalog_display_name"] = catalog.get("display_name")
+        merged["catalog_id"] = catalog.get("id")
+        merged["catalog_version_id"] = catalog.get("version_id")
+    if isinstance(source, dict):
+        for key in ("git_commit_url", "git_ref", "git_commit_date"):
+            if source.get(key):
+                merged[key] = source.get(key)
+    merged["release_group"] = group_name
+    merged["topic_name"] = topic_name
+    merged["job_id"] = job_id
+    merged["project_id"] = project_id
+    merged["role"] = role
+    merged["imported_from"] = str(ANALYZER_ROOT)
+    return merged
+
+
+def _copy_export_job(
+    export_root: Path,
+    job_id: str,
+    target_dir: Path,
+    *,
+    group_name: str,
+    topic_name: str,
+    role: str,
+    copy_large_artifacts: bool,
+    force: bool,
+    stats: ImportStats,
+) -> ImportStats:
+    source_dir = export_root / job_id
+    if not source_dir.is_dir():
+        return stats
+
+    source_metadata = _load_yaml(source_dir / "metadata.yaml")
+    metadata = _merge_metadata(
+        source_metadata,
+        group_name=group_name,
+        topic_name=topic_name,
+        job_id=job_id,
+        role=role,
+    )
+    _write_yaml(target_dir / "metadata.yaml", metadata)
+    stats = stats.add(copied=1)
+
+    for file_name in ("current.parquet", "future.parquet", "devops.parquet", "detection.yaml"):
+        src = source_dir / file_name
+        if not src.exists():
+            continue
+        action = _copy_or_link(src, target_dir / file_name, copy_large_artifacts=copy_large_artifacts, force=force)
+        stats = _artifact_stat(stats, action)
+    return stats
+
+
+def _copy_summary_job(
+    job_dir: Path,
+    target_dir: Path,
+    *,
+    group_name: str,
+    topic_name: str,
+    job_id: str,
+    role: str,
+    force: bool,
+    stats: ImportStats,
+) -> ImportStats:
+    resources = target_dir / "resources"
+    resources.mkdir(parents=True, exist_ok=True)
+    metadata = _load_yaml(job_dir / "metadata.yaml")
+    metadata = _merge_metadata(metadata, group_name=group_name, topic_name=topic_name, job_id=job_id, role=role)
+    _write_yaml(resources / "metadata.yaml", metadata)
+    stats = stats.add(copied=1)
+
+    for src, dst in (
+        (job_dir / "summary.json", resources / "summary.json"),
+        (job_dir / "summary.json", target_dir / "summary.json"),
+    ):
+        if src.exists():
+            action = _copy_or_link(src, dst, copy_large_artifacts=True, force=force)
+            stats = _artifact_stat(stats, action)
+    return stats
+
+
+def import_releases(
+    analyzer_root: Path,
+    data_root: Path,
+    *,
+    static_root: Path | None,
+    copy_large_artifacts: bool,
+    force: bool,
+) -> ImportStats:
+    export_root = analyzer_root / "export"
+    pdf_root = analyzer_root / "pdf"
+    stats = ImportStats()
+
+    if not export_root.is_dir() or not pdf_root.is_dir():
+        raise FileNotFoundError(f"Expected export/ and pdf/ under {analyzer_root}")
+
+    if static_root is not None:
+        try:
+            static_root.mkdir(parents=True, exist_ok=True)
+        except PermissionError as exc:
+            print(
+                f"Warning: cannot write static PDF directory {static_root}: {exc}. "
+                "Continuing without static PDF publishing. Use --static-root with a writable path, "
+                "fix directory ownership, or pass --skip-static-publish.",
+                file=sys.stderr,
+            )
+            static_root = None
+
+    for pdf_group_dir in sorted(path for path in pdf_root.iterdir() if path.is_dir()):
+        group_name = pdf_group_dir.name
+        release_dir = data_root / f"release_spec_{_safe_path_part(group_name, 'release')}"
+        trend_dir = data_root / f"trend_release_{_safe_path_part(group_name, 'release')}"
+        release_dir.mkdir(parents=True, exist_ok=True)
+        stats = stats.add(releases=1)
+
+        release_metadata_written = False
+        for topic_dir in sorted(path for path in pdf_group_dir.iterdir() if path.is_dir()):
+            topic_name = topic_dir.name
+            topic_safe = _safe_path_part(topic_name, "topic")
+            trend_topic_dir = trend_dir / topic_name
+            trend_topic_dir.mkdir(parents=True, exist_ok=True)
+
+            specsheet_pdf = topic_dir / "specsheet" / "specsheet.pdf"
+            if specsheet_pdf.exists():
+                action = _copy_or_link(
+                    specsheet_pdf,
+                    release_dir / "specsheet" / topic_safe / "specsheet.pdf",
+                    copy_large_artifacts=copy_large_artifacts,
+                    force=force,
+                )
+                stats = _artifact_stat(stats, action)
+                action = _copy_or_link(
+                    specsheet_pdf,
+                    trend_topic_dir / "specsheet" / "specsheet.pdf",
+                    copy_large_artifacts=copy_large_artifacts,
+                    force=force,
+                )
+                stats = _artifact_stat(stats, action)
+                if topic_name == MAIN_TOPIC:
+                    action = _copy_or_link(
+                        specsheet_pdf,
+                        release_dir / "specsheet" / "specsheet.pdf",
+                        copy_large_artifacts=copy_large_artifacts,
+                        force=force,
+                    )
+                    stats = _artifact_stat(stats, action)
+                if static_root is not None:
+                    static_pdf_path = (
+                        static_root
+                        / _safe_path_part(group_name, "release")
+                        / f"{_safe_path_part(topic_name, 'topic')}.pdf"
+                    )
+                    action = _publish_static_pdf(specsheet_pdf, static_pdf_path, force=force)
+                    stats = _artifact_stat(stats, action)
+
+            for job_dir in sorted(path for path in topic_dir.iterdir() if path.is_dir()):
+                if job_dir.name in {"trend", "specsheet"}:
+                    continue
+                summary_path = job_dir / "summary.json"
+                if not summary_path.exists():
+                    continue
+                job_id = job_dir.name
+                role = _classify_summary(_load_json(summary_path))
+                role_dir_name = ROLE_DIR_BY_SUMMARY_ROLE.get(role, role)
+
+                trend_job_dir = trend_topic_dir / job_id
+                trend_job_dir.mkdir(parents=True, exist_ok=True)
+                for src_name in ("summary.json", "metadata.yaml"):
+                    src = job_dir / src_name
+                    if not src.exists():
+                        continue
+                    if src_name == "metadata.yaml":
+                        metadata = _merge_metadata(
+                            _load_yaml(src),
+                            group_name=group_name,
+                            topic_name=topic_name,
+                            job_id=job_id,
+                            role=role,
+                        )
+                        _write_yaml(trend_job_dir / src_name, metadata)
+                        stats = stats.add(copied=1)
+                    else:
+                        action = _copy_or_link(src, trend_job_dir / src_name, copy_large_artifacts=True, force=force)
+                        stats = _artifact_stat(stats, action)
+                stats = stats.add(trend_jobs=1)
+
+                if topic_name != MAIN_TOPIC:
+                    continue
+                role_dir = release_dir / role_dir_name
+                stats = _copy_export_job(
+                    export_root,
+                    job_id,
+                    role_dir,
+                    group_name=group_name,
+                    topic_name=topic_name,
+                    role=role,
+                    copy_large_artifacts=copy_large_artifacts,
+                    force=force,
+                    stats=stats,
+                )
+                stats = _copy_summary_job(
+                    job_dir,
+                    role_dir,
+                    group_name=group_name,
+                    topic_name=topic_name,
+                    job_id=job_id,
+                    role=role,
+                    force=force,
+                    stats=stats,
+                )
+                stats = stats.add(role_runs=1)
+
+                if not release_metadata_written and role in {"full", "performance_blocks"}:
+                    metadata = _merge_metadata(
+                        _load_yaml(job_dir / "metadata.yaml"),
+                        group_name=group_name,
+                        topic_name=topic_name,
+                        job_id=job_id,
+                        role=role,
+                    )
+                    _write_yaml(release_dir / "metadata.yaml", metadata)
+                    release_metadata_written = True
+                    stats = stats.add(copied=1)
+
+        if not release_metadata_written:
+            _write_yaml(release_dir / "metadata.yaml", {"release_group": group_name, "imported_from": str(analyzer_root)})
+            stats = stats.add(copied=1)
+
+    return stats
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=ANALYZER_ROOT,
+        help="Analyzer output root containing export/ and pdf/. Default: %(default)s",
+    )
+    parser.add_argument(
+        "--data-root",
+        type=Path,
+        default=None,
+        help="Dashboard data root. Defaults to EVAL_DASHBOARD_DATA_ROOT or ./data.",
+    )
+    parser.add_argument(
+        "--copy-large-artifacts",
+        action="store_true",
+        help=(
+            "Copy parquet/PDF/PNG/HTML instead of symlinking them. Use this on servers "
+            "when the original analyzer output will not stay mounted."
+        ),
+    )
+    parser.add_argument(
+        "--static-root",
+        type=Path,
+        default=None,
+        help=(
+            "Directory for static PDF copies. Defaults to ./static/release_specs. "
+            "Use a writable path on servers and mount it as /app/static/release_specs."
+        ),
+    )
+    parser.add_argument(
+        "--skip-static-publish",
+        action="store_true",
+        help="Do not write static/release_specs PDF copies. Data/specsheet PDFs are still imported.",
+    )
+    parser.add_argument("--force", action="store_true", help="Replace existing imported files and links.")
+    args = parser.parse_args()
+
+    data_root = args.data_root.resolve() if args.data_root is not None else _data_root()
+    static_root = None
+    if not args.skip_static_publish:
+        static_root = (args.static_root if args.static_root is not None else Path.cwd() / "static" / "release_specs").resolve()
+    stats = import_releases(
+        args.source.resolve(),
+        data_root,
+        static_root=static_root,
+        copy_large_artifacts=args.copy_large_artifacts,
+        force=args.force,
+    )
+    print(json.dumps(stats.__dict__, indent=2, ensure_ascii=False))
+    print(f"Imported analyzer releases into {data_root}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/evaluation_dashboard_app/slides/evaluation_dashboard_intro_ja.md b/evaluation_dashboard_app/slides/evaluation_dashboard_intro_ja.md
new file mode 100644
index 0000000..a9e3ea5
--- /dev/null
+++ b/evaluation_dashboard_app/slides/evaluation_dashboard_intro_ja.md
@@ -0,0 +1,202 @@
+---
+marp: true
+theme: default
+paginate: true
+size: 16:9
+---
+
+# 評価業務を回す統合ダッシュボード
+
+### Perception Evaluation Dashboard 紹介
+
+- 発表者: （お名前）
+- 日付: 2026-04-22
+
+---
+
+## 今日お話しすること
+
+1. 背景: 何が課題だったか
+2. システム全体像と基本導線
+3. 主要機能の紹介
+4. 技術構成（運用アーキテクチャ）
+5. 価値と今後の展開
+
+---
+
+## 背景: 何が困っていたのか
+
+- 評価結果の取得、整形、可視化、比較、共有が分断
+- ツール間移動や手作業が多く、時間がかかる
+- 比較条件が揃わず、議論が噛み合わない
+- 属人化しやすく、再現性が下がる
+
+---
+
+## このシステムの狙い
+
+**評価業務の一連の流れを一つの場所で回すこと**
+
+- 取得
+- 整形（Summary / Score / parquet）
+- 分析（単体・比較）
+- 共有
+- データ管理
+
+> 「見るための道具」ではなく「評価業務の基盤」
+
+---
+
+## システム全体像
+
+- Download: 結果・シナリオの取得
+- Eval Results: CSV/評価データ生成
+- Overview: Run/比較条件の統一
+- 各分析ページ: 観点別の深掘り
+- Data Management: 共有・整理・運用
+
+---
+
+## 典型的なユーザー導線（3ステップ）
+
+1. Downloadで対象結果を取得
+2. Summary.csv / Score.csv を生成
+3. OverviewでRun選択 → 分析ページへ
+
+**効果:** 作業切替が減り、初心者でも入りやすい
+
+---
+
+## Overviewの役割（ハブ）
+
+- Single / Compare モード切替
+- Baseline / Candidate の比較前提を統一
+- Perception Label / Product Label で共通フィルタ
+- 共有URLで表示状態を再現
+
+**ポイント:** 前提を揃えて議論のズレを防ぐ
+
+---
+
+## Downloadの価値
+
+- ダウンロード前後の作業を一体化
+- 取得だけでなく、後続分析で使える形まで整備
+- 重い処理はタスク化し、進捗を可視化
+
+**運用効果:** 属人化の低減、日常業務の安定化
+
+---
+
+## TP Summary / Criteria Based Score
+
+### TP Summary
+- Summary.csvベースの全体傾向把握
+- 平均だけでなく分布や外れ値を確認
+
+### Criteria Based Score
+- Score.csvベースの基準別評価
+- しきい値（ゲート）を使った合否判断に有効
+
+---
+
+## Detection Stats
+
+- parquet + DuckDBで詳細分析
+- TP/FPなどの状態別・距離ビン別比較
+- 全体値では見えない偏りを発見
+
+**使いどころ:** 「差がある」から「どこで差がある」へ
+
+---
+
+## Bounding Box Viewer
+
+- BEV上でバウンディングボックスを可視化
+- topic / label / visibility で絞り込み
+- Compareで挙動差を視覚的に確認
+
+**意義:** 数値の裏にある実体を理解する
+
+---
+
+## TLR Analysis
+
+- 信号認識評価に特化
+- criteriaマトリクス、車両状態×信号種別、zone分析
+- 比較時は差分ヒートマップで把握
+
+**強み:** ドメイン特化で弱点を構造的に把握
+
+---
+
+## Prediction Evaluation
+
+- minADE / minFDEなどを距離・方向・ラベルで分解
+- リング表示などで偏りを直感的に把握
+
+**意義:** 全体平均だけでなく改善対象を特定できる
+
+---
+
+## Data Management
+
+- Run一覧（サイズ/更新日時/成果物有無）を可視化
+- ZIP化、共有リンク生成、不要Run削除
+
+**実運用で重要:** 分析品質を落とさない整理整頓
+
+---
+
+## Help / Debugページ
+
+- Help: アプリ内で使い方を参照
+- Parquet Debug: スキーマやデータ切り分け
+- Deployment Debug: Postgres / Redis / Worker状態確認
+
+**設計思想:** 使う人だけでなく支える人の導線も用意
+
+---
+
+## 技術構成（フロント〜バック）
+
+- フロント: Streamlit
+- 非同期: Redis + Worker
+- 状態管理: Postgres
+- 配置: Nginx配下に複数Streamlit + Worker群
+
+**狙い:** UIを軽く保ち、重い処理はバックグラウンド化
+
+---
+
+## このアーキテクチャのメリット
+
+- 重い処理で画面が固まりにくい
+- 複数人運用でも影響を分離しやすい
+- 障害切り分けがしやすい
+- スケールしやすい（UIと処理を分離）
+
+---
+
+## まとめ
+
+このシステムは、
+
+- 取得
+- 整形
+- 分析
+- 共有
+- 運用
+
+を一体化した**評価業務の基盤**です。
+
+> **評価を見える化するだけでなく、回せるようにするシステム**
+
+---
+
+## Q&A
+
+- ご質問はこのセクションへご記入ください
+- 時間内に回答しきれない場合は後ほどフォローします
+
+ありがとうございました。
diff --git a/evaluation_dashboard_app/tests/conftest.py b/evaluation_dashboard_app/tests/conftest.py
new file mode 100644
index 0000000..e24af09
--- /dev/null
+++ b/evaluation_dashboard_app/tests/conftest.py
@@ -0,0 +1,8 @@
+"""Pytest configuration for evaluation_dashboard_app tests."""
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers",
+        "integration: tests that require a live service or network (opt-in)",
+    )
diff --git a/evaluation_dashboard_app/tests/test_t4_visualizer_client.py b/evaluation_dashboard_app/tests/test_t4_visualizer_client.py
new file mode 100644
index 0000000..26aabc6
--- /dev/null
+++ b/evaluation_dashboard_app/tests/test_t4_visualizer_client.py
@@ -0,0 +1,200 @@
+"""Tests for lib/t4_visualizer_client.py.
+
+Unit tests use mocks (no network). Optional integration tests call a live server when
+``T4_VISUALIZER_BASE_URL`` points at a reachable instance (e.g. ``t4-server``); they
+skip if the server is down.
+"""
+
+from __future__ import annotations
+
+import base64
+import os
+from unittest.mock import MagicMock
+
+import pytest
+
+from lib.t4_visualizer_client import (
+    ENV_BASE_URL,
+    RenderRequest,
+    T4VisualizerClient,
+    T4VisualizerError,
+    TargetObjectIn,
+    target_object_from_gt_row,
+)
+
+
+# Minimal valid 1x1 PNG (transparent pixel)
+_TINY_PNG_BYTES = (
+    b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
+    b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01"
+    b"\x00\x00\x05\x00\x01\r\n-\xdb\x00\x00\x00\x00IEND\xaeB`\x82"
+)
+_TINY_PNG_B64 = base64.b64encode(_TINY_PNG_BYTES).decode("ascii")
+
+
+def _ok_response(json_data):
+    r = MagicMock()
+    r.ok = True
+    r.status_code = 200
+    r.text = ""
+    r.json.return_value = json_data
+    return r
+
+
+def _err_response(status_code: int, text: str = "not found"):
+    r = MagicMock()
+    r.ok = False
+    r.status_code = status_code
+    r.text = text
+    return r
+
+
+def test_health_success():
+    session = MagicMock()
+    session.get.return_value = _ok_response({"status": "ok"})
+    c = T4VisualizerClient(base_url="http://test:9999", session=session)
+    assert c.health() == {"status": "ok"}
+    session.get.assert_called_once()
+    assert "health" in session.get.call_args[0][0]
+
+
+def test_list_datasets_success():
+    session = MagicMock()
+    session.get.return_value = _ok_response(
+        {"data_dir": "/data", "datasets": ["ds_a", "ds_b"]}
+    )
+    c = T4VisualizerClient(base_url="http://test", session=session)
+    d = c.list_datasets()
+    assert d["datasets"] == ["ds_a", "ds_b"]
+    assert d["data_dir"] == "/data"
+
+
+def test_list_dataset_scenarios_success():
+    session = MagicMock()
+    session.get.return_value = _ok_response(
+        {
+            "t4dataset_id": "ds1",
+            "scenarios": [
+                {
+                    "name": "scene-a",
+                    "token": "tok",
+                    "description": "",
+                    "nbr_samples": 42,
+                }
+            ],
+            "version": None,
+        }
+    )
+    c = T4VisualizerClient(base_url="http://test", session=session)
+    out = c.list_dataset_scenarios("ds1")
+    assert out["t4dataset_id"] == "ds1"
+    assert len(out["scenarios"]) == 1
+    assert out["scenarios"][0]["name"] == "scene-a"
+    assert out["scenarios"][0]["nbr_samples"] == 42
+    session.get.assert_called_once()
+    call_url = session.get.call_args[0][0]
+    assert "ds1" in call_url and "scenarios" in call_url
+
+
+def test_render_success_decode():
+    session = MagicMock()
+    session.post.return_value = _ok_response(
+        {
+            "sample_token": "tok1",
+            "timestamp_us": 1234567890000000,
+            "images": [{"label": "CAM_FRONT", "png_base64": _TINY_PNG_B64}],
+        }
+    )
+    c = T4VisualizerClient(base_url="http://test", session=session)
+    req = RenderRequest(
+        t4dataset_id="ds1",
+        scenario_name="scene-1",
+        frame_index=0,
+        target_objects=[TargetObjectIn(uuid="u1", x=1.0, y=2.0, z=0.5, label="car")],
+    )
+    out = c.render(req)
+    assert out.sample_token == "tok1"
+    assert out.timestamp_us == 1234567890000000
+    assert len(out.images) == 1
+    raw = out.decode_png("CAM_FRONT")
+    assert raw == _TINY_PNG_BYTES
+    all_pairs = out.decode_all_images()
+    assert all_pairs == [("CAM_FRONT", _TINY_PNG_BYTES)]
+
+
+def test_render_http_error():
+    session = MagicMock()
+    session.post.return_value = _err_response(404, "Dataset 'x' not found")
+    c = T4VisualizerClient(base_url="http://test", session=session)
+    req = RenderRequest(t4dataset_id="x", scenario_name="s", frame_index=0)
+    with pytest.raises(T4VisualizerError) as ei:
+        c.render(req)
+    assert ei.value.status_code == 404
+    assert "404" in str(ei.value) or "not found" in ei.value.response_text.lower()
+
+
+def test_render_invalid_json_body():
+    session = MagicMock()
+    r = MagicMock()
+    r.ok = True
+    r.status_code = 200
+    r.json.side_effect = ValueError("bad json")
+    session.post.return_value = r
+    c = T4VisualizerClient(base_url="http://test", session=session)
+    with pytest.raises(T4VisualizerError, match="Invalid JSON"):
+        c.render(RenderRequest(t4dataset_id="a", scenario_name="b", frame_index=0))
+
+
+def test_target_object_from_gt_row_full():
+    row = {
+        "uuid": "abc-123",
+        "x": 10.5,
+        "y": -2.0,
+        "z": 0.1,
+        "label": "pedestrian",
+        "width": 0.5,
+        "length": 0.6,
+        "height": 1.7,
+        "yaw": 0.25,
+    }
+    d = target_object_from_gt_row(row)
+    assert d["uuid"] == "abc-123"
+    assert d["x"] == 10.5
+    assert d["y"] == -2.0
+    assert d["z"] == 0.1
+    assert d["label"] == "pedestrian"
+    assert d["width"] == 0.5
+    assert d["length"] == 0.6
+    assert d["height"] == 1.7
+    assert d["yaw"] == 0.25
+
+
+def test_target_object_from_gt_row_gt_uuid_partial():
+    row = {"gt_uuid": "g1", "x": 1.0, "y": 2.0, "label": "car"}
+    d = target_object_from_gt_row(row)
+    assert d["uuid"] == "g1"
+    assert d["z"] == 0.0
+    assert d["width"] == 0.0
+    assert d["length"] == 0.0
+    assert d["height"] == 0.0
+    assert d["yaw"] == 0.0
+
+
+def test_target_object_from_gt_row_uuid_precedence():
+    row = {"uuid": "u", "gt_uuid": "g", "x": 0, "y": 0}
+    d = target_object_from_gt_row(row)
+    assert d["uuid"] == "u"
+
+
+@pytest.mark.integration
+def test_live_health_if_configured():
+    """Skips unless T4_VISUALIZER_BASE_URL is set and server responds."""
+    base = os.environ.get(ENV_BASE_URL)
+    if not base:
+        pytest.skip(f"Set {ENV_BASE_URL} to run integration test against a live server")
+    client = T4VisualizerClient(base_url=base, timeout=5.0)
+    try:
+        h = client.health()
+    except (T4VisualizerError, OSError) as e:
+        pytest.skip(f"Server not reachable: {e}")
+    assert h.get("status") == "ok"
diff --git a/evaluation_dashboard_app/worker/run_worker.py b/evaluation_dashboard_app/worker/run_worker.py
index 4a4b8d2..f98a2fd 100644
--- a/evaluation_dashboard_app/worker/run_worker.py
+++ b/evaluation_dashboard_app/worker/run_worker.py
@@ -6,11 +6,13 @@
 
 import os
 import sys
+import faulthandler
 
 _APP_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if _APP_ROOT not in sys.path:
     sys.path.insert(0, _APP_ROOT)
 os.chdir(_APP_ROOT)
+faulthandler.enable(all_threads=True)
 
 def main():
     from rq import Worker
diff --git a/evaluation_dashboard_app/worker/tasks.py b/evaluation_dashboard_app/worker/tasks.py
index a27cb80..c72f75c 100644
--- a/evaluation_dashboard_app/worker/tasks.py
+++ b/evaluation_dashboard_app/worker/tasks.py
@@ -5,15 +5,42 @@
 
 import os
 import re
+import json
+import shutil
 import sys
-from typing import Any, Dict
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import yaml
 
 # App root on path for lib imports
 _APP_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if _APP_ROOT not in sys.path:
     sys.path.insert(0, _APP_ROOT)
 
-from lib.db import update_task_status, update_task_progress, append_task_log, update_task_result_summary
+from lib.db import (
+    append_task_log,
+    get_task,
+    update_task_progress,
+    update_task_result_summary,
+    update_task_status,
+)
+from lib.run_metadata import (
+    read_run_metadata,
+    resolve_run_directory_from_task_parameters,
+    upsert_run_metadata,
+)
+from lib.specsheet_report import write_trend_metadata
+
+_RELEASE_PERFORMANCE_CATALOG_ID = "e36d75b9-6c3a-4970-9b9b-5cd13f7a9da3"
+_RELEASE_PERFORMANCE_INTEGRATION_ID = "96ad8fba-0228-4c2b-9166-07d4de1a0760"
+_RELEASE_DEVOPS_CATALOG_ID = "ab0f8498-cc1b-4726-836f-e18e8bcb3200"
+_RELEASE_DEVOPS_INTEGRATION_ID = "295cff78-9bc9-4d60-b7aa-f95be6ff96a4"
+_RELEASE_OPTIONAL_CATALOG_ID = "09039022-ec91-41bf-9e93-fdefccdfc9bc"
+_RELEASE_SKIP_LARGE_FILE = True
+_RELEASE_LARGE_FILE_MB = 50.0
 
 # Optional imports for tasks that need them
 def _import_eval_summary():
@@ -28,14 +55,330 @@ def _import_catalog_io():
         return None
 
 
+def _parquet_progress_callback(
+    task_id: str,
+    *,
+    prefix: str = "Parquet",
+    pct_start: float = 0.0,
+    pct_end: float = 100.0,
+):
+    """Return a pkl-file progress callback for pkl_archive_to_parquet."""
+
+    def _on_progress(done: int, total: int) -> None:
+        total_safe = max(1, int(total or 0))
+        done_safe = min(max(0, int(done or 0)), total_safe)
+        pct = pct_start + (done_safe / total_safe) * max(0.0, pct_end - pct_start)
+        message = f"{prefix}: processing pkl files {done_safe}/{total_safe}"
+        update_task_progress(task_id, message=message, pct=min(pct_end, pct))
+        append_task_log(task_id, message)
+
+    return _on_progress
+
+
+def _eval_worker_count(parameters: Dict[str, Any], total: int) -> int:
+    """Resolve bounded eval concurrency. Defaults to 4, capped by total dirs."""
+    if total <= 0:
+        return 1
+    raw = parameters.get("eval_workers", os.environ.get("EVAL_WORKERS_DEFAULT", 4))
+    try:
+        workers = int(raw)
+    except (TypeError, ValueError):
+        workers = 4
+    try:
+        max_workers = int(os.environ.get("EVAL_WORKERS_MAX", 16))
+    except ValueError:
+        max_workers = 16
+    return max(1, min(workers, max_workers, total))
+
+
+def _compact_eval_path(path: Any, *, parts: int = 2) -> str:
+    """Return a readable tail path for task logs without flooding the UI."""
+    text = str(path or "").strip()
+    if not text:
+        return "unknown"
+    try:
+        p = Path(text)
+        tail = p.parts[-parts:]
+        return "/".join(tail) if tail else text
+    except Exception:
+        return text
+
+
+def _run_eval_result_dirs(
+    *,
+    task_id: str,
+    eval_summary: Any,
+    target_dirs: list[str],
+    overwrite: bool,
+    eval_workers: int,
+    pct_start: float,
+    pct_end: float,
+    label: str = "Eval",
+) -> list[Dict[str, Any]]:
+    """Run eval_result across result dirs with bounded concurrency and calm progress."""
+    total = len(target_dirs)
+    if total <= 0:
+        update_task_progress(task_id, message=f"{label}: no result directories found", pct=pct_end)
+        return []
+
+    workers = max(1, min(int(eval_workers or 1), total))
+    span = max(0.0, pct_end - pct_start)
+    statuses: list[Dict[str, Any]] = []
+    counts = {"success": 0, "skipped": 0, "failed": 0}
+
+    def _record(status: Dict[str, Any]) -> str:
+        statuses.append(status)
+        state = str(status.get("status") or "failed")
+        if state not in counts:
+            state = "failed"
+        counts[state] += 1
+        if state == "failed":
+            append_task_log(
+                task_id,
+                f"{label}: eval failed for {status.get('path', '')}: {status.get('detail', '')}",
+            )
+        return state
+
+    def _progress(done: int, latest: str | None = None) -> None:
+        pct = pct_start + (done / total) * span
+        latest_text = f" latest: {latest}" if latest else ""
+        update_task_progress(
+            task_id,
+            message=(
+                f"{label}: completed {done}/{total} dirs "
+                f"(success {counts['success']}, skipped {counts['skipped']}, failed {counts['failed']})"
+                f"{latest_text}"
+            ),
+            pct=min(pct_end, pct),
+        )
+
+    append_task_log(task_id, f"{label}: running eval_result for {total} directories with {workers} worker(s)")
+    _progress(0)
+
+    if workers == 1:
+        for i, result_dir in enumerate(target_dirs, start=1):
+            append_task_log(task_id, f"{label}: starting {i}/{total}: {result_dir}")
+            status = eval_summary.run_eval_result_for_dir(result_dir, overwrite=overwrite)
+            state = _record(status)
+            short_path = _compact_eval_path(status.get("path") or result_dir)
+            append_task_log(task_id, f"{label}: {i}/{total} {state}: {short_path}")
+            _progress(i, short_path)
+        return statuses
+
+    with ThreadPoolExecutor(max_workers=workers) as executor:
+        future_map = {
+            executor.submit(eval_summary.run_eval_result_for_dir, result_dir, overwrite=overwrite): result_dir
+            for result_dir in target_dirs
+        }
+        for done, future in enumerate(as_completed(future_map), start=1):
+            result_dir = future_map[future]
+            try:
+                status = future.result()
+            except Exception as exc:
+                status = {"path": result_dir, "status": "failed", "detail": str(exc)}
+            state = _record(status)
+            short_path = _compact_eval_path(status.get("path") or result_dir)
+            append_task_log(task_id, f"{label}: {done}/{total} {state}: {short_path}")
+            _progress(done, short_path)
+    return statuses
+
+
+def _copy_task_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]:
+    copied: Dict[str, Any] = {}
+    for key, value in (parameters or {}).items():
+        if isinstance(value, (dict, list, tuple, str, int, float, bool)) or value is None:
+            copied[key] = value
+        else:
+            copied[key] = str(value)
+    return copied
+
+
+def _resolve_active_integration_id(api: Any, project_id: str, catalog_id: str) -> str:
+    """Resolve latest active integration for a catalog when the UI only provided a catalog id."""
+    url = f"{api.api_base_url}/projects/{project_id}/integrations"
+    response = api.request(url, {"catalog_id": catalog_id, "size": 100}, method="GET")
+    if response is None:
+        raise RuntimeError(f"No response returned while loading integrations for catalog {catalog_id}.")
+    if response.status_code != 200:
+        raise RuntimeError(
+            f"Failed to load integrations for catalog {catalog_id}: status={response.status_code}"
+        )
+    payload = json.loads(response.content)
+    integrations = payload.get("integrations", []) or []
+    active = [
+        item for item in integrations
+        if isinstance(item, dict)
+        and str(item.get("catalog_id") or "").strip() == catalog_id
+        and not bool(item.get("deleted"))
+    ]
+    if not active:
+        raise RuntimeError(f"No active integration found for catalog {catalog_id}.")
+
+    def _sort_key(item: Dict[str, object]) -> tuple:
+        return (
+            str(item.get("updated_at") or ""),
+            int(item.get("version_id") or 0),
+            str(item.get("id") or ""),
+        )
+
+    active.sort(key=_sort_key, reverse=True)
+    return str(active[0].get("id") or "").strip()
+
+
+def _task_row_payload(task_id: str) -> Dict[str, Any]:
+    row = get_task(task_id) or {}
+    return {
+        "id": str(row.get("id") or task_id),
+        "type": str(row.get("type") or "").strip(),
+        "status": str(row.get("status") or "").strip(),
+        "requested_by": str(row.get("session_id") or "").strip(),
+        "created_at": row.get("created_at"),
+        "updated_at": row.get("updated_at"),
+        "result_path": str(row.get("result_path") or "").strip(),
+        "error_message": str(row.get("error_message") or "").strip(),
+        "progress_message": str(row.get("progress_message") or "").strip(),
+        "progress_pct": row.get("progress_pct"),
+    }
+
+
+def _task_request_payload(parameters: Dict[str, Any]) -> Dict[str, Any]:
+    params = _copy_task_parameters(parameters)
+    return {
+        "environment": str(params.get("environment") or "default").strip() or "default",
+        "project_id": str(params.get("project_id") or "").strip(),
+        "job_id": str(params.get("job_id") or "").strip(),
+        "catalog_id": str(params.get("catalog_id") or "").strip(),
+        "integration_id": str(params.get("integration_id") or "").strip(),
+        "source_job_id": str(params.get("source_job_id") or "").strip(),
+        "target_name": str(params.get("target_name") or "").strip(),
+        "description": str(params.get("description") or "").strip(),
+        "suite_id": str(params.get("suite_id") or "").strip(),
+        "suite_ids": list(params.get("suite_ids") or []),
+        "download_type": str(params.get("download_type") or "").strip(),
+        "phase": str(params.get("phase") or "").strip(),
+        "skip_large_file": bool(params.get("skip_large_file", False)),
+        "large_file_mb": params.get("large_file_mb"),
+        "keep_zip_files": bool(params.get("keep_zip_files", False)),
+        "run_eval": bool(params.get("run_eval", False)),
+        "generate_parquet": bool(params.get("generate_parquet", False)),
+        "eval_recursive": bool(params.get("eval_recursive", False)),
+        "eval_overwrite": bool(params.get("eval_overwrite", False)),
+        "max_retries": params.get("max_retries"),
+        "clean_build": bool(params.get("clean_build", False)),
+        "debug": bool(params.get("debug", False)),
+        "is_tag": bool(params.get("is_tag", False)),
+        "scenario_name_filter": str(params.get("scenario_name_filter") or "").strip(),
+        "selected_ids": list(params.get("selected_ids") or []),
+        "output_path": str(
+            params.get("output_path")
+            or params.get("output_dir")
+            or params.get("eval_root")
+            or params.get("pkl_dir")
+            or ""
+        ).strip(),
+        "parameters": params,
+    }
+
+
+def _build_run_metadata_patch(task_id: str, parameters: Dict[str, Any], *, task_type: str) -> Dict[str, Any]:
+    return {
+        "source_mode": task_type,
+        "task": _task_row_payload(task_id),
+        "request": _task_request_payload(parameters),
+    }
+
+
+def _update_run_metadata(
+    task_id: str,
+    parameters: Dict[str, Any],
+    *,
+    task_type: str,
+    create_missing: bool = False,
+    extra: Optional[Dict[str, Any]] = None,
+) -> None:
+    run_dir = resolve_run_directory_from_task_parameters(parameters, create_missing=create_missing)
+    if run_dir is None:
+        return
+    patch = _build_run_metadata_patch(task_id, parameters, task_type=task_type)
+    if extra:
+        patch.update(extra)
+    try:
+        upsert_run_metadata(run_dir, patch, create_missing=create_missing)
+    except Exception:
+        pass
+
+
+def _append_run_event(
+    task_id: str,
+    parameters: Dict[str, Any],
+    *,
+    task_type: str,
+    message: str,
+) -> None:
+    run_dir = resolve_run_directory_from_task_parameters(parameters, create_missing=False)
+    if run_dir is None:
+        return
+    try:
+        metadata = read_run_metadata(run_dir)
+        events = list(metadata.get("events") or [])
+        events.append({"at": _task_row_payload(task_id).get("updated_at"), "message": message})
+        if len(events) > 50:
+            events = events[-50:]
+        upsert_run_metadata(
+            run_dir,
+            {
+                "events": events,
+                "task": _task_row_payload(task_id),
+            },
+            create_missing=False,
+        )
+    except Exception:
+        pass
+
+
+def _mark_run_status(
+    task_id: str,
+    parameters: Dict[str, Any],
+    *,
+    task_type: str,
+    status: str,
+    error_message: str = "",
+    result_path: str = "",
+    extra: Optional[Dict[str, Any]] = None,
+    create_missing: bool = False,
+) -> None:
+    patch: Dict[str, Any] = {
+        "task": {
+            "status": status,
+        }
+    }
+    if error_message:
+        patch["task"]["error_message"] = error_message
+    if result_path:
+        patch["task"]["result_path"] = result_path
+    if extra:
+        patch.update(extra)
+    _update_run_metadata(
+        task_id,
+        parameters,
+        task_type=task_type,
+        create_missing=create_missing,
+        extra=patch,
+    )
+
+
 def job_generate_summary_csv(task_id: str, parameters: Dict[str, Any]) -> None:
     """Generate Summary.csv and Score.csv under eval_root."""
     update_task_status(task_id, "running")
     append_task_log(task_id, "Starting generate_summary_csv")
+    _mark_run_status(task_id, parameters, task_type="generate_summary_csv", status="running")
     try:
         eval_summary = _import_eval_summary()
         eval_root = parameters.get("eval_root")
         if not eval_root:
+            _mark_run_status(
+                task_id, parameters, task_type="generate_summary_csv", status="failed", error_message="Missing eval_root"
+            )
             update_task_status(task_id, "failed", error_message="Missing eval_root")
             return
         append_task_log(task_id, f"Generating summary under {eval_root}")
@@ -50,10 +393,28 @@ def job_generate_summary_csv(task_id: str, parameters: Dict[str, Any]) -> None:
                 "score_rows": info.get("score_rows", 0),
             },
         )
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="generate_summary_csv",
+            extra={
+                "evaluation": {
+                    "summary_path": result_path,
+                    "summary_rows": info.get("summary_rows", 0),
+                    "score_rows": info.get("score_rows", 0),
+                }
+            },
+        )
         append_task_log(task_id, f"Done. Output: {result_path}")
+        _mark_run_status(
+            task_id, parameters, task_type="generate_summary_csv", status="completed", result_path=str(result_path or "")
+        )
         update_task_status(task_id, "completed", result_path=result_path)
     except Exception as e:
         append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(
+            task_id, parameters, task_type="generate_summary_csv", status="failed", error_message=str(e)
+        )
         update_task_status(task_id, "failed", error_message=str(e))
         raise
 
@@ -62,40 +423,76 @@ def job_run_eval_dirs(task_id: str, parameters: Dict[str, Any]) -> None:
     """Run eval_result for each dir under eval_root, then generate Summary/Score CSV."""
     update_task_status(task_id, "running")
     append_task_log(task_id, "Starting run_eval_dirs")
+    _mark_run_status(task_id, parameters, task_type="run_eval_dirs", status="running")
     try:
         eval_summary = _import_eval_summary()
         eval_root = parameters.get("eval_root")
         recursive = parameters.get("recursive", True)
         overwrite = parameters.get("overwrite", False)
         if not eval_root:
+            _mark_run_status(task_id, parameters, task_type="run_eval_dirs", status="failed", error_message="Missing eval_root")
             update_task_status(task_id, "failed", error_message="Missing eval_root")
             return
         target_dirs = eval_summary.find_eval_result_dirs(eval_root, recursive=recursive)
         if not target_dirs:
+            _mark_run_status(
+                task_id, parameters, task_type="run_eval_dirs", status="failed", error_message="No result directories found"
+            )
             update_task_status(task_id, "failed", error_message="No result directories found")
             return
         total = len(target_dirs)
-        append_task_log(task_id, f"Processing {total} directories")
-        for i, result_dir in enumerate(target_dirs):
-            pct = 100.0 * (i + 1) / total if total else 0
-            update_task_progress(task_id, message=f"Processing {i+1}/{total}: {result_dir}", pct=pct)
-            append_task_log(task_id, f"Processing {i+1}/{total}: {result_dir}")
-            eval_summary.run_eval_result_for_dir(result_dir, overwrite=overwrite)
+        eval_workers = _eval_worker_count(parameters, total)
+        statuses = _run_eval_result_dirs(
+            task_id=task_id,
+            eval_summary=eval_summary,
+            target_dirs=target_dirs,
+            overwrite=overwrite,
+            eval_workers=eval_workers,
+            pct_start=0.0,
+            pct_end=90.0,
+            label="Eval",
+        )
         append_task_log(task_id, "Generating summary CSV")
+        update_task_progress(task_id, message="Generating Summary.csv / Score.csv", pct=95)
         info = eval_summary.generate_summary_and_score_csv(eval_root)
         result_path = info.get("summary_path", eval_root)
+        failed = [s for s in statuses if s.get("status") == "failed"]
+        skipped = [s for s in statuses if s.get("status") == "skipped"]
+        succeeded = [s for s in statuses if s.get("status") == "success"]
         summary = {
             "job": "run_eval_dirs",
             "directories_processed": total,
+            "success": len(succeeded),
+            "failed": len(failed),
+            "skipped": len(skipped),
             "summary_path": result_path,
             "summary_rows": info.get("summary_rows", 0),
             "score_rows": info.get("score_rows", 0),
         }
         update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_eval_dirs",
+            extra={
+                "evaluation": {
+                    "directories_processed": total,
+                    "success": len(succeeded),
+                    "failed": len(failed),
+                    "skipped": len(skipped),
+                    "summary_path": result_path,
+                    "summary_rows": info.get("summary_rows", 0),
+                    "score_rows": info.get("score_rows", 0),
+                }
+            },
+        )
         append_task_log(task_id, f"Done. Output: {result_path}")
+        update_task_progress(task_id, message="Eval complete", pct=100)
+        _mark_run_status(task_id, parameters, task_type="run_eval_dirs", status="completed", result_path=result_path)
         update_task_status(task_id, "completed", result_path=result_path)
     except Exception as e:
         append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="run_eval_dirs", status="failed", error_message=str(e))
         update_task_status(task_id, "failed", error_message=str(e))
         raise
 
@@ -104,30 +501,50 @@ def job_build_parquet(task_id: str, parameters: Dict[str, Any]) -> None:
     """Build scene_result parquet from pkl directory."""
     update_task_status(task_id, "running")
     append_task_log(task_id, "Starting build_parquet")
+    _mark_run_status(task_id, parameters, task_type="build_parquet", status="running")
     try:
         pkl_archive_to_parquet = _import_catalog_io()
         if pkl_archive_to_parquet is None:
+            _mark_run_status(
+                task_id, parameters, task_type="build_parquet", status="failed", error_message="perception_catalog_io not available"
+            )
             update_task_status(task_id, "failed", error_message="perception_catalog_io not available")
             return
         pkl_dir = parameters.get("pkl_dir")
         if not pkl_dir:
+            _mark_run_status(task_id, parameters, task_type="build_parquet", status="failed", error_message="Missing pkl_dir")
             update_task_status(task_id, "failed", error_message="Missing pkl_dir")
             return
         append_task_log(task_id, f"Building parquet from {pkl_dir}")
+        update_task_progress(task_id, message=f"Parquet: scanning pkl files in {pkl_dir}", pct=0)
         project_id = parameters.get("project_id")
         job_id = parameters.get("job_id")
         parquet_path = pkl_archive_to_parquet(
             pkl_dir,
-            on_progress=None,
-            on_skip=None,
+            on_progress=_parquet_progress_callback(task_id, pct_start=5, pct_end=95),
+            on_skip=lambda path, reason: append_task_log(task_id, f"Parquet skipped {path}: {reason}"),
             project_id=project_id,
             job_id=job_id,
         )
+        update_task_progress(task_id, message="Parquet: writing output complete", pct=100)
         update_task_result_summary(task_id, {"job": "build_parquet", "output_path": parquet_path})
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="build_parquet",
+            extra={
+                "parquet": {
+                    "enabled": True,
+                    "path": parquet_path,
+                }
+            },
+        )
         append_task_log(task_id, f"Done. Output: {parquet_path}")
+        _mark_run_status(task_id, parameters, task_type="build_parquet", status="completed", result_path=parquet_path)
         update_task_status(task_id, "completed", result_path=parquet_path)
     except Exception as e:
         append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="build_parquet", status="failed", error_message=str(e))
         update_task_status(task_id, "failed", error_message=str(e))
         raise
 
@@ -144,15 +561,231 @@ def _progress_callback(task_id: str, message: str) -> None:
         update_task_progress(task_id, message=message)
 
 
+def _is_failed_case_status(case_report: Dict[str, Any]) -> bool:
+    """Best-effort failure check for case report payloads."""
+    result = case_report.get("result") or {}
+    status = (result.get("status") or case_report.get("status") or "").strip().lower()
+    return status in {"failed", "failure", "error", "timed_out", "timeout", "canceled", "cancelled", "aborted"}
+
+
+def _summarize_suite_reports(suite_rows: Any, *, limit: int = 10) -> list[Dict[str, Any]]:
+    """Normalize suite rows into a compact summary suitable for task result_summary."""
+    normalized = []
+    for row in suite_rows or []:
+        normalized.append(
+            {
+                "suite_name": row.get("name", ""),
+                "total": int(row.get("all", 0) or 0),
+                "success": int(row.get("success", 0) or 0),
+                "failed": int(row.get("fail", 0) or 0),
+                "canceled": int(row.get("cancel", 0) or 0),
+                "simulation": row.get("simulation", ""),
+                "url": row.get("url", ""),
+            }
+        )
+    normalized.sort(key=lambda item: (-item["failed"], item["suite_name"]))
+    return normalized[:limit]
+
+
+def _suite_case_totals(suite_rows: Any) -> Dict[str, int]:
+    """Aggregate totals from full suite rows."""
+    totals = {"total": 0, "success": 0, "failed": 0, "canceled": 0}
+    for row in suite_rows or []:
+        totals["total"] += int(row.get("all", 0) or 0)
+        totals["success"] += int(row.get("success", 0) or 0)
+        totals["failed"] += int(row.get("fail", 0) or 0)
+        totals["canceled"] += int(row.get("cancel", 0) or 0)
+    return totals
+
+
+def _extract_failed_case_details(case_reports: Any, *, limit: int = 12) -> list[Dict[str, Any]]:
+    """Return a compact list of failed cases for UI/log display."""
+    failed = []
+    for report in case_reports or []:
+        if not _is_failed_case_status(report):
+            continue
+        failed.append(
+            {
+                "scenario_name": ((report.get("scenario") or {}).get("display_name", "")),
+                "suite_name": ((report.get("suite") or {}).get("display_name", "")),
+                "status": report.get("status", ""),
+                "fail_message": report.get("fail_message", ""),
+                "failure_cause_labels": report.get("failure_cause_labels", []),
+                "archive_log_id": (((report.get("logs") or {}).get("simulation_archive") or {}).get("id", "")),
+                "result_json_log_id": (((report.get("logs") or {}).get("simulation_result_json") or {}).get("id", "")),
+            }
+        )
+    failed.sort(key=lambda item: (item["suite_name"], item["scenario_name"], item["fail_message"]))
+    return failed[:limit]
+
+
+def _extract_git_target_from_report(report: Dict[str, Any]) -> str:
+    """Compact branch/tag label from evaluator report metadata."""
+    source = ((report.get("event") or {}).get("source") or {})
+    git_ref = str(source.get("git_ref") or "").strip()
+    if git_ref.startswith("refs/heads/"):
+        return git_ref[len("refs/heads/"):]
+    if git_ref.startswith("refs/tags/"):
+        return git_ref[len("refs/tags/"):]
+    return git_ref or str(source.get("git_sha") or "").strip()[:12] or ""
+
+
+def _extract_job_title_from_report(report: Dict[str, Any]) -> str:
+    """Prefer evaluator description for display title, with a readable fallback."""
+    description = str(report.get("description") or "").strip()
+    if description:
+        return description
+    started_like = report.get("started_at") or report.get("scheduled_at") or report.get("finished_at")
+    return f"no description ({started_like or 'unknown start'})"
+
+
+def _extract_catalog_url_from_report(report: Dict[str, Any]) -> str:
+    """Best-effort catalog URL matching the recent evaluator jobs list."""
+    catalog = report.get("catalog") or {}
+    direct_url = str(
+        catalog.get("web_url")
+        or catalog.get("url")
+        or catalog.get("catalog_url")
+        or ""
+    ).strip()
+    if direct_url:
+        return direct_url
+    project_id = str(report.get("project_id") or "").strip()
+    catalog_id = str(catalog.get("catalog_id") or catalog.get("id") or "").strip()
+    if project_id and catalog_id:
+        return f"https://evaluation.tier4.jp/evaluation/vehicle_catalogs/{catalog_id}?project_id={project_id}"
+    return ""
+
+
+def _extract_source_metadata_from_report(report: Dict[str, Any]) -> Dict[str, str]:
+    """Best-effort source metadata for local run rendering without refetching."""
+    source = ((report.get("event") or {}).get("source") or {})
+    git_url = str(source.get("git_web_url") or source.get("git_url") or "").strip()
+    return {
+        "title": _extract_job_title_from_report(report),
+        "target": _extract_git_target_from_report(report),
+        "git_sha": str(source.get("git_sha") or "").strip(),
+        "git_ref_url": str(source.get("git_ref_url") or "").strip(),
+        "git_commit_url": str(source.get("git_commit_url") or "").strip(),
+        "source_url": git_url,
+        "source_repo_label": git_url.rstrip("/").split("/")[-1] if git_url else "",
+    }
+
+
+def _build_evaluator_result_summary(
+    *,
+    job_id: str,
+    report_url: str,
+    evaluator_status: str,
+    final_report: Dict[str, Any],
+    suite_rows: Any = None,
+    failed_cases: Any = None,
+) -> Dict[str, Any]:
+    """Build a compact evaluator summary that the task detail UI can render."""
+    build = final_report.get("build") or {}
+    test = final_report.get("test") or {}
+    available = test.get("available_case_results") or test.get("case_results") or {}
+    case_totals = _suite_case_totals(suite_rows)
+    source_meta = _extract_source_metadata_from_report(final_report)
+    if not any(case_totals.values()):
+        case_totals = {
+            "total": int(available.get("total_count", 0) or 0),
+            "success": int(available.get("success_count", 0) or 0),
+            "failed": int(available.get("failure_count", 0) or 0),
+            "canceled": int(available.get("cancellation_count", 0) or 0),
+        }
+    return {
+        "evaluator_job_id": job_id,
+        "evaluator_report_url": report_url,
+        "evaluator_status": evaluator_status,
+        "evaluator_scheduled_by": final_report.get("scheduled_by", ""),
+        "evaluator_catalog_id": ((final_report.get("catalog") or {}).get("id") or ""),
+        "evaluator_catalog_name": ((final_report.get("catalog") or {}).get("display_name") or ""),
+        "evaluator_catalog_version_id": ((final_report.get("catalog") or {}).get("version_id") or ""),
+        "evaluator_catalog_url": _extract_catalog_url_from_report(final_report),
+        "evaluator_title": source_meta.get("title", ""),
+        "evaluator_target": source_meta.get("target", ""),
+        "evaluator_git_sha": source_meta.get("git_sha", ""),
+        "evaluator_git_ref_url": source_meta.get("git_ref_url", ""),
+        "evaluator_git_commit_url": source_meta.get("git_commit_url", ""),
+        "evaluator_source_url": source_meta.get("source_url", ""),
+        "evaluator_source_repo_label": source_meta.get("source_repo_label", ""),
+        "evaluator_build_status": build.get("status", ""),
+        "evaluator_test_status": test.get("status", ""),
+        "evaluator_fail_message": final_report.get("fail_message", ""),
+        "evaluator_case_totals": case_totals,
+        "evaluator_suites": _summarize_suite_reports(suite_rows),
+        "evaluator_failed_cases": _extract_failed_case_details(failed_cases),
+    }
+
+
+def _fetch_evaluator_context(
+    *,
+    project_id: str,
+    job_id: str,
+    environment: str,
+) -> Dict[str, Any]:
+    """Best-effort evaluator metadata for tasks that start from an existing evaluator job."""
+    if not project_id or not job_id:
+        return {}
+    try:
+        from lib import evaluator_api
+
+        os.environ["AUTH_PROFILE"] = environment or "default"
+        api = evaluator_api.EvaluationRunAPI()
+        report = api.get_job_status(project_id, job_id)
+        status = evaluator_api.extract_job_status(report)
+        build = report.get("build") or {}
+        test = report.get("test") or {}
+        available = test.get("available_case_results") or test.get("case_results") or {}
+        source_meta = _extract_source_metadata_from_report(report)
+        return {
+            "job_id": job_id,
+            "report_url": evaluator_api.get_job_report_url(project_id, job_id),
+            "status": status,
+            "scheduled_by": str(report.get("scheduled_by") or "").strip(),
+            "catalog_id": str(((report.get("catalog") or {}).get("id") or "")).strip(),
+            "catalog_name": str(((report.get("catalog") or {}).get("display_name") or "")).strip(),
+            "catalog_version_id": (report.get("catalog") or {}).get("version_id"),
+            "catalog_url": _extract_catalog_url_from_report(report),
+            "title": source_meta.get("title", ""),
+            "target": source_meta.get("target", ""),
+            "git_sha": source_meta.get("git_sha", ""),
+            "git_ref_url": source_meta.get("git_ref_url", ""),
+            "git_commit_url": source_meta.get("git_commit_url", ""),
+            "source_url": source_meta.get("source_url", ""),
+            "source_repo_label": source_meta.get("source_repo_label", ""),
+            "build_status": str(build.get("status") or "").strip(),
+            "test_status": str(test.get("status") or "").strip(),
+            "fail_message": str(report.get("fail_message") or "").strip(),
+            "case_totals": {
+                "total": int(available.get("total_count", 0) or 0),
+                "success": int(available.get("success_count", 0) or 0),
+                "failed": int(available.get("failure_count", 0) or 0),
+                "canceled": int(available.get("cancellation_count", 0) or 0),
+            },
+        }
+    except Exception:
+        return {}
+
+
 def job_download_results(task_id: str, parameters: Dict[str, Any]) -> None:
     """Download job results (archives or result JSON) and extract/organize. Requires auth."""
     update_task_status(task_id, "running")
     append_task_log(task_id, "Starting download_results")
+    _mark_run_status(
+        task_id,
+        parameters,
+        task_type="download_results",
+        status="running",
+        create_missing=True,
+    )
     try:
         from lib import download_core  # noqa: F401
         output_path = parameters.get("output_path")
         project_id = parameters.get("project_id")
         job_id = parameters.get("job_id")
+        environment = str(parameters.get("environment") or "default").strip() or "default"
         suite_id = parameters.get("suite_id")
         suite_ids = parameters.get("suite_ids")  # optional list
         download_type = parameters.get("download_type", "archives")  # archives | result_json
@@ -161,8 +794,25 @@ def job_download_results(task_id: str, parameters: Dict[str, Any]) -> None:
         large_file_mb = float(parameters.get("large_file_mb", 50.0))
         keep_zip_files = parameters.get("keep_zip_files", False)
         if not all([output_path, project_id, job_id]):
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_results",
+                status="failed",
+                error_message="Missing output_path, project_id, or job_id",
+                create_missing=True,
+            )
             update_task_status(task_id, "failed", error_message="Missing output_path, project_id, or job_id")
             return
+        evaluator_context = _fetch_evaluator_context(project_id=project_id, job_id=job_id, environment=environment)
+        if evaluator_context:
+            _update_run_metadata(
+                task_id,
+                parameters,
+                task_type="download_results",
+                create_missing=True,
+                extra={"evaluator": evaluator_context},
+            )
         on_progress = lambda msg: _progress_callback(task_id, msg)
         on_warning = lambda msg: append_task_log(task_id, msg)
         failure_count, total_attempted, rows = download_core.run_download_results(
@@ -189,22 +839,67 @@ def job_download_results(task_id: str, parameters: Dict[str, Any]) -> None:
             "rows": rows[:500],
         }
         update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="download_results",
+            create_missing=True,
+            extra={
+                "download": {
+                    "mode": "download_results",
+                    "total": total_attempted,
+                    "success": success_count,
+                    "failed": failure_count,
+                    "rows": rows[:100],
+                    "download_type": download_type,
+                    "phase": phase,
+                    "skip_large_file": bool(skip_large_file),
+                    "large_file_mb": large_file_mb,
+                    "keep_zip_files": bool(keep_zip_files),
+                }
+            },
+        )
         append_task_log(task_id, "Download and extract completed")
         if success_count == 0 and failure_count > 0:
             err_msg = f"Download completed with {failure_count} failures. See task log for details."
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_results",
+                status="failed",
+                result_path=output_path,
+                error_message=err_msg,
+            )
             update_task_status(task_id, "failed", result_path=output_path, error_message=err_msg)
         else:
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_results",
+                status="completed",
+                result_path=output_path,
+            )
             update_task_status(task_id, "completed", result_path=output_path)
     except ImportError:
+        _mark_run_status(
+            task_id,
+            parameters,
+            task_type="download_results",
+            status="failed",
+            error_message="Download worker not available: lib.download_core not implemented",
+            create_missing=True,
+        )
         update_task_status(
             task_id,
             "failed",
             error_message="Download worker not available: lib.download_core not implemented",
         )
     except NotImplementedError as e:
+        _mark_run_status(task_id, parameters, task_type="download_results", status="failed", error_message=str(e))
         update_task_status(task_id, "failed", error_message=str(e))
     except Exception as e:
         append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="download_results", status="failed", error_message=str(e))
         update_task_status(task_id, "failed", error_message=str(e))
         raise
 
@@ -213,19 +908,44 @@ def job_download_scenarios(task_id: str, parameters: Dict[str, Any]) -> None:
     """Download scenarios from job to output_dir. Requires auth."""
     update_task_status(task_id, "running")
     append_task_log(task_id, "Starting download_scenarios")
+    _mark_run_status(
+        task_id,
+        parameters,
+        task_type="download_scenarios",
+        status="running",
+        create_missing=True,
+    )
     try:
         from lib import download_core  # noqa: F401
         output_dir = parameters.get("output_dir") or parameters.get("output_path")
         project_id = parameters.get("project_id")
         job_id = parameters.get("job_id")
+        environment = str(parameters.get("environment") or "default").strip() or "default"
         suite_id = parameters.get("suite_id")
         suite_ids = parameters.get("suite_ids")
         overwrite = parameters.get("overwrite", False)
         scenario_name_filter = parameters.get("scenario_name_filter")
         selected_ids = parameters.get("selected_ids")
         if not all([output_dir, project_id, job_id]):
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_scenarios",
+                status="failed",
+                error_message="Missing output_dir, project_id, or job_id",
+                create_missing=True,
+            )
             update_task_status(task_id, "failed", error_message="Missing output_dir, project_id, or job_id")
             return
+        evaluator_context = _fetch_evaluator_context(project_id=project_id, job_id=job_id, environment=environment)
+        if evaluator_context:
+            _update_run_metadata(
+                task_id,
+                parameters,
+                task_type="download_scenarios",
+                create_missing=True,
+                extra={"evaluator": evaluator_context},
+            )
         on_progress = lambda msg: _progress_callback(task_id, msg)
         on_warning = lambda msg: append_task_log(task_id, msg)
         failure_count, total_attempted, rows = download_core.run_download_scenarios(
@@ -250,22 +970,1573 @@ def job_download_scenarios(task_id: str, parameters: Dict[str, Any]) -> None:
             "rows": rows[:500],
         }
         update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="download_scenarios",
+            create_missing=True,
+            extra={
+                "scenario_download": {
+                    "total": total_attempted,
+                    "success": success_count,
+                    "failed": failure_count,
+                    "overwrite": bool(overwrite),
+                    "scenario_name_filter": str(scenario_name_filter or "").strip(),
+                    "selected_ids": list(selected_ids or []),
+                    "rows": rows[:100],
+                }
+            },
+        )
         append_task_log(task_id, "Download scenarios completed")
         if failure_count > 0:
             err_msg = f"Download completed with {failure_count} failures. See task log for details."
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_scenarios",
+                status="failed",
+                result_path=output_dir,
+                error_message=err_msg,
+            )
             update_task_status(task_id, "failed", result_path=output_dir, error_message=err_msg)
         else:
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_scenarios",
+                status="completed",
+                result_path=output_dir,
+            )
             update_task_status(task_id, "completed", result_path=output_dir)
     except ImportError:
+        _mark_run_status(
+            task_id,
+            parameters,
+            task_type="download_scenarios",
+            status="failed",
+            error_message="Download worker not available: lib.download_core not implemented",
+            create_missing=True,
+        )
         update_task_status(
             task_id,
             "failed",
             error_message="Download worker not available: lib.download_core not implemented",
         )
     except NotImplementedError as e:
+        _mark_run_status(task_id, parameters, task_type="download_scenarios", status="failed", error_message=str(e))
+        update_task_status(task_id, "failed", error_message=str(e))
+    except Exception as e:
+        append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="download_scenarios", status="failed", error_message=str(e))
+        update_task_status(task_id, "failed", error_message=str(e))
+        raise
+
+
+def job_download_and_eval(task_id: str, parameters: Dict[str, Any]) -> None:
+    """Download results, then run eval and parquet generation. Stops on download failure."""
+    update_task_status(task_id, "running")
+    append_task_log(task_id, "Starting download_and_eval combined workflow")
+    _mark_run_status(
+        task_id,
+        parameters,
+        task_type="download_and_eval",
+        status="running",
+        create_missing=True,
+    )
+    try:
+        from lib import download_core
+        output_path = parameters.get("output_path")
+        project_id = parameters.get("project_id")
+        job_id = parameters.get("job_id")
+        environment = str(parameters.get("environment") or "default").strip() or "default"
+        suite_id = parameters.get("suite_id")
+        suite_ids = parameters.get("suite_ids")
+        download_type = parameters.get("download_type", "archives")
+        phase = parameters.get("phase", "perception.object_recognition.tracking.objects")
+        skip_large_file = parameters.get("skip_large_file", False)
+        large_file_mb = float(parameters.get("large_file_mb", 50.0))
+        keep_zip_files = parameters.get("keep_zip_files", False)
+        run_eval = parameters.get("run_eval", True)
+        generate_parquet = parameters.get("generate_parquet", True)
+        eval_recursive = parameters.get("eval_recursive", True)
+        eval_overwrite = parameters.get("eval_overwrite", False)
+        
+        if not all([output_path, project_id, job_id]):
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_and_eval",
+                status="failed",
+                error_message="Missing output_path, project_id, or job_id",
+                create_missing=True,
+            )
+            update_task_status(task_id, "failed", error_message="Missing output_path, project_id, or job_id")
+            return
+        evaluator_context = _fetch_evaluator_context(project_id=project_id, job_id=job_id, environment=environment)
+        if evaluator_context:
+            _update_run_metadata(
+                task_id,
+                parameters,
+                task_type="download_and_eval",
+                create_missing=True,
+                extra={"evaluator": evaluator_context},
+            )
+        
+        def on_progress(msg: str) -> None:
+            append_task_log(task_id, msg)
+            match = re.search(r"(\d+)\s*/\s*(\d+)", msg)
+            pct = None
+            if match:
+                n, m = int(match.group(1)), max(1, int(match.group(2)))
+                ratio = n / m
+                if msg.startswith("Eval:"):
+                    pct = 60.0 + ratio * 25.0
+                elif msg.startswith("Parquet:"):
+                    pct = 85.0 + ratio * 13.0
+                elif msg.startswith("Downloading"):
+                    pct = ratio * 60.0
+            if pct is None:
+                if msg.startswith("Download complete"):
+                    pct = 60.0
+                elif msg.startswith("Generating parquet"):
+                    pct = 85.0
+            if pct is None:
+                update_task_progress(task_id, message=msg)
+            else:
+                update_task_progress(task_id, message=msg, pct=pct)
+
+        on_warning = lambda msg: append_task_log(task_id, msg)
+        
+        result = download_core.run_download_and_eval(
+            project_id=project_id,
+            job_id=job_id,
+            suite_id=suite_id,
+            output_path=output_path,
+            download_type=download_type,
+            phase=phase,
+            skip_large_file=skip_large_file,
+            large_file_mb=large_file_mb,
+            keep_zip_files=keep_zip_files,
+            suite_ids=suite_ids,
+            run_eval=run_eval,
+            generate_parquet=generate_parquet,
+            eval_recursive=eval_recursive,
+            eval_overwrite=eval_overwrite,
+            eval_workers=_eval_worker_count(parameters, 10_000),
+            on_progress=on_progress,
+            on_warning=on_warning,
+        )
+        
+        # Build result summary
+        summary = {
+            "job": "download_and_eval",
+            "download_success": result.get("download_success", False),
+            "download_summary": result.get("download_summary", {}),
+            "eval_summary": result.get("eval_summary", {}),
+            "parquet_path": result.get("parquet_path", ""),
+            "errors": result.get("errors", []),
+        }
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="download_and_eval",
+            create_missing=True,
+            extra={
+                "download": {
+                    "mode": "download_and_eval",
+                    **(result.get("download_summary", {}) or {}),
+                    "download_type": download_type,
+                    "phase": phase,
+                    "skip_large_file": bool(skip_large_file),
+                    "large_file_mb": large_file_mb,
+                    "keep_zip_files": bool(keep_zip_files),
+                },
+                "evaluation": {
+                    **(result.get("eval_summary", {}) or {}),
+                    "enabled": bool(run_eval),
+                    "recursive": bool(eval_recursive),
+                    "overwrite": bool(eval_overwrite),
+                },
+                "parquet": {
+                    "enabled": bool(generate_parquet),
+                    "path": result.get("parquet_path", ""),
+                },
+                "errors": list(result.get("errors", []) or []),
+            },
+        )
+        
+        if not result.get("download_success"):
+            err_msg = result.get("errors", ["Download failed"])[0]
+            append_task_log(task_id, f"Stopped: {err_msg}")
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_and_eval",
+                status="failed",
+                result_path=output_path,
+                error_message=err_msg,
+            )
+            update_task_status(task_id, "failed", result_path=output_path, error_message=err_msg)
+        elif result.get("errors"):
+            # Partial success with some errors
+            errs = "; ".join(result["errors"][:5])
+            append_task_log(task_id, f"Completed with errors: {errs}")
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_and_eval",
+                status="completed",
+                result_path=output_path,
+                error_message=errs,
+            )
+            update_task_status(task_id, "completed", result_path=output_path)
+        else:
+            append_task_log(task_id, "Download and eval completed successfully")
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="download_and_eval",
+                status="completed",
+                result_path=output_path,
+            )
+            update_task_status(task_id, "completed", result_path=output_path)
+            
+    except Exception as e:
+        append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="download_and_eval", status="failed", error_message=str(e))
+        update_task_status(task_id, "failed", error_message=str(e))
+        raise
+
+
+def _write_release_metadata_file(path: Path, metadata: Dict[str, Any]) -> Path:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as fh:
+        yaml.safe_dump(metadata, fh, allow_unicode=True, sort_keys=False)
+    return path
+
+
+def _build_devops_trend_summary_from_suites(rows: list[dict[str, Any]]) -> Dict[str, Any]:
+    suite_results: dict[str, dict[str, int]] = {}
+    for row in rows or []:
+        suite_name = str(row.get("name") or row.get("suite_name") or row.get("simulation") or "suite").strip()
+        total = int(row.get("all", 0) or row.get("total", 0) or 0)
+        passed = int(row.get("success", 0) or row.get("passed", 0) or 0)
+        if total <= 0:
+            failed = int(row.get("fail", 0) or row.get("failed", 0) or 0)
+            canceled = int(row.get("cancel", 0) or row.get("canceled", 0) or 0)
+            total = passed + failed + canceled
+        if total <= 0:
+            continue
+        suite_results[suite_name] = {"passed": passed, "total": total}
+    if not suite_results:
+        return {"DevOps": {}}
+
+    try:
+        from perception_catalog_analyzer.path import DEVOPS_MAPPING_PATH
+
+        with Path(DEVOPS_MAPPING_PATH).open("r", encoding="utf-8") as fh:
+            category_mapping = yaml.safe_load(fh) or {}
+    except Exception:
+        category_mapping = {}
+
+    if not isinstance(category_mapping, dict) or not category_mapping:
+        return {"DevOps": {"Suite pass rate": suite_results}}
+
+    mapped: Dict[str, Any] = {}
+    matched_suites: set[str] = set()
+    for major_category, mid_categories in category_mapping.items():
+        if not isinstance(mid_categories, dict):
+            continue
+        major_payload: Dict[str, Any] = {}
+        for mid_category, sub_categories in mid_categories.items():
+            if not isinstance(sub_categories, dict):
+                continue
+            mid_payload: Dict[str, Any] = {}
+            for sub_category, suite_names in sub_categories.items():
+                if not isinstance(suite_names, list):
+                    continue
+                passed = 0
+                total = 0
+                for suite_name in suite_names:
+                    result = suite_results.get(str(suite_name))
+                    if not result:
+                        continue
+                    matched_suites.add(str(suite_name))
+                    passed += int(result.get("passed", 0) or 0)
+                    total += int(result.get("total", 0) or 0)
+                mid_payload[str(sub_category)] = {"passed": passed, "total": total}
+            if mid_payload:
+                major_payload[str(mid_category)] = mid_payload
+        if major_payload:
+            mapped[str(major_category)] = major_payload
+
+    unmatched = {
+        suite_name: result
+        for suite_name, result in suite_results.items()
+        if suite_name not in matched_suites
+    }
+    if unmatched:
+        mapped.setdefault("その他", {})["未分類"] = unmatched
+
+    return {"DevOps": mapped}
+
+
+def _write_devops_trend_summary(path: Path, rows: list[dict[str, Any]]) -> Path | None:
+    summary_payload = _build_devops_trend_summary_from_suites(rows)
+    if not summary_payload.get("DevOps"):
+        return None
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as fh:
+        json.dump(summary_payload, fh, ensure_ascii=False, indent=2)
+    return path
+
+
+def _suite_rows_from_existing_devops_summary(path: Path) -> list[dict[str, Any]]:
+    if not path.exists():
+        return []
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    devops = payload.get("DevOps") if isinstance(payload, dict) else {}
+    if not isinstance(devops, dict):
+        return []
+    suite_pass_rate = devops.get("Suite pass rate")
+    if not isinstance(suite_pass_rate, dict):
+        return []
+    rows: list[dict[str, Any]] = []
+    for suite_name, result in suite_pass_rate.items():
+        if not isinstance(result, dict):
+            continue
+        rows.append(
+            {
+                "suite_name": str(suite_name),
+                "success": int(result.get("passed", 0) or 0),
+                "total": int(result.get("total", 0) or 0),
+            }
+        )
+    return rows
+
+
+def _has_release_download_artifacts(path: Path) -> bool:
+    return any(path.rglob("scene_result.pkl")) or any(path.rglob("*.pkl.z"))
+
+
+def _find_release_parquet(path: Path) -> Path | None:
+    current = path / "current.parquet"
+    if current.exists():
+        return current
+    for parquet in sorted(path.glob("*.parquet"), key=lambda p: p.name.lower()):
+        return parquet
+    return None
+
+
+def _build_release_analysis_artifacts(
+    *,
+    task_id: str,
+    project_id: str,
+    job_id: str,
+    role: str,
+    output_path: Path,
+    phase: str,
+    run_eval: bool = False,
+    skip_large_file: bool = _RELEASE_SKIP_LARGE_FILE,
+    large_file_mb: float = _RELEASE_LARGE_FILE_MB,
+    progress_start: float = 48.0,
+    progress_end: float = 78.0,
+) -> Dict[str, Any]:
+    """Create the normal app analysis files for a release job."""
+    from lib import download_core
+
+    eval_summary = _import_eval_summary()
+    pkl_archive_to_parquet = _import_catalog_io()
+    output_path.mkdir(parents=True, exist_ok=True)
+    result: Dict[str, Any] = {
+        "path": str(output_path),
+        "download": {},
+        "eval": {},
+        "parquet_path": "",
+        "warnings": [],
+    }
+    effective_skip_large_file = _RELEASE_SKIP_LARGE_FILE or bool(skip_large_file)
+    effective_large_file_mb = float(large_file_mb or _RELEASE_LARGE_FILE_MB)
+
+    progress_span = max(0.0, progress_end - progress_start)
+    download_end = progress_start + progress_span * 0.55
+    eval_end = progress_start + progress_span * 0.90
+    existing_parquet = _find_release_parquet(output_path)
+
+    def _on_progress(msg: str) -> None:
+        append_task_log(task_id, f"{role}: {msg}")
+        progress_msg = f"{role}: {msg}"
+        pct = progress_start
+        match = re.search(r"Downloading\s+(\d+)\s*/\s*(\d+)", msg)
+        if match:
+            current = int(match.group(1))
+            total = max(1, int(match.group(2)))
+            pct = progress_start + ((current - 1) / total) * max(0.0, download_end - progress_start)
+        elif "Extracting" in msg or "Organizing" in msg:
+            pct = download_end
+        update_task_progress(task_id, message=progress_msg, pct=min(download_end, pct))
+
+    def _on_warning(msg: str) -> None:
+        result["warnings"].append(msg)
+        append_task_log(task_id, f"WARNING: {role}: {msg}")
+
+    if existing_parquet or _has_release_download_artifacts(output_path):
+        append_task_log(task_id, f"{role}: using existing downloaded artifacts in {output_path}")
+        update_task_progress(task_id, message=f"{role}: using existing downloaded artifacts", pct=download_end)
+        failure_count = 0
+        total_attempted = 0
+        success_count = 0
+        rows: list[dict[str, Any]] = []
+    else:
+        if not job_id:
+            raise RuntimeError(f"{role}: no local artifacts found and no evaluator job id is available for download.")
+        update_task_progress(task_id, message=f"{role}: finding downloadable case logs", pct=progress_start)
+        failure_count, total_attempted, rows = download_core.run_download_results(
+            project_id=project_id,
+            job_id=job_id,
+            suite_id=None,
+            output_path=str(output_path),
+            download_type="archives",
+            phase=phase,
+            skip_large_file=effective_skip_large_file,
+            large_file_mb=effective_large_file_mb,
+            keep_zip_files=False,
+            suite_ids=None,
+            on_progress=_on_progress,
+            on_warning=_on_warning,
+        )
+        success_count = total_attempted - failure_count
+        if success_count <= 0:
+            raise RuntimeError(f"{role}: download produced no successful case artifacts.")
+    result["download"] = {
+        "total": total_attempted,
+        "success": success_count,
+        "failed": failure_count,
+        "skip_large_file": effective_skip_large_file,
+        "large_file_mb": effective_large_file_mb,
+        "rows": rows[:100],
+    }
+
+    if run_eval and eval_summary and not existing_parquet:
+        target_dirs = eval_summary.find_eval_result_dirs(str(output_path), recursive=True)
+        total = len(target_dirs)
+        summary_csv = output_path / "Summary.csv"
+        score_csv = output_path / "Score.csv"
+        if target_dirs and summary_csv.exists() and score_csv.exists():
+            append_task_log(task_id, f"{role}: Summary.csv / Score.csv already exist; skipping eval")
+            update_task_progress(task_id, message=f"{role}: existing Summary.csv / Score.csv found", pct=eval_end)
+            statuses = []
+            result["eval"] = {
+                "directories_processed": total,
+                "success": 0,
+                "failed": 0,
+                "skipped": total,
+                "summary_path": str(summary_csv),
+                "summary_rows": 0,
+                "score_rows": 0,
+            }
+        elif target_dirs:
+            statuses = _run_eval_result_dirs(
+                task_id=task_id,
+                eval_summary=eval_summary,
+                target_dirs=target_dirs,
+                overwrite=False,
+                eval_workers=_eval_worker_count({}, total),
+                pct_start=download_end,
+                pct_end=eval_end,
+                label=f"{role}: eval_result",
+            )
+        else:
+            update_task_progress(task_id, message=f"{role}: no eval_result directories found", pct=eval_end)
+            statuses = []
+        if target_dirs and not result["eval"]:
+            update_task_progress(task_id, message=f"{role}: generating Summary.csv / Score.csv", pct=eval_end)
+            csv_info = eval_summary.generate_summary_and_score_csv(str(output_path))
+            result["eval"] = {
+                "directories_processed": len(target_dirs),
+                "success": sum(1 for item in statuses if item.get("status") == "success"),
+                "failed": sum(1 for item in statuses if item.get("status") == "failed"),
+                "skipped": sum(1 for item in statuses if item.get("status") == "skipped"),
+                "summary_path": csv_info.get("summary_path", ""),
+                "summary_rows": csv_info.get("summary_rows", 0),
+                "score_rows": csv_info.get("score_rows", 0),
+            }
+        elif not result["eval"]:
+            result["eval"] = {
+                "directories_processed": 0,
+                "success": 0,
+                "failed": 0,
+                "skipped": 0,
+            }
+    elif not run_eval:
+        append_task_log(task_id, f"{role}: skipping eval; parquet is sufficient for release PDF generation")
+        result["eval"] = {"enabled": False, "reason": "release_pdf_uses_parquet"}
+    elif existing_parquet:
+        append_task_log(task_id, f"{role}: skipping eval because parquet already exists")
+        result["eval"] = {"enabled": False, "reason": "existing_parquet"}
+
+    existing_parquet = _find_release_parquet(output_path)
+    if existing_parquet:
+        append_task_log(task_id, f"{role}: existing parquet found: {existing_parquet}")
+        result["parquet_path"] = str(existing_parquet)
+        update_task_progress(task_id, message=f"{role}: existing parquet found", pct=progress_end)
+    elif pkl_archive_to_parquet:
+        try:
+            update_task_progress(task_id, message=f"{role}: generating parquet", pct=eval_end)
+            result["parquet_path"] = pkl_archive_to_parquet(
+                str(output_path),
+                on_progress=_parquet_progress_callback(
+                    task_id,
+                    prefix=f"{role}: parquet",
+                    pct_start=eval_end,
+                    pct_end=99,
+                ),
+                on_skip=lambda path, reason: append_task_log(
+                    task_id,
+                    f"WARNING: {role}: parquet skipped {path}: {reason}",
+                ),
+                project_id=project_id,
+                job_id=job_id,
+            ) or ""
+            update_task_progress(task_id, message=f"{role}: parquet generated", pct=99)
+        except Exception as exc:
+            warning = f"Parquet generation failed: {exc}"
+            result["warnings"].append(warning)
+            append_task_log(task_id, f"WARNING: {role}: {warning}")
+
+    append_task_log(
+        task_id,
+        (
+            f"{role}: analysis artifacts ready at {output_path} "
+            f"({success_count}/{total_attempted} downloads)"
+        ),
+    )
+    return result
+
+
+def job_run_release_specsheet_workflow(task_id: str, parameters: Dict[str, Any]) -> None:
+    """Schedule the standard release evaluator jobs, process them as app-native runs, then build a release specsheet."""
+    update_task_status(task_id, "running")
+    append_task_log(task_id, "Starting release specsheet workflow")
+    _mark_run_status(
+        task_id,
+        parameters,
+        task_type="run_release_specsheet_workflow",
+        status="running",
+        create_missing=True,
+    )
+    try:
+        from lib import evaluator_api
+        from lib.specsheet_report import (
+            DEFAULT_SPECSHEET_LABELS,
+            DEFAULT_SPECSHEET_TOPIC,
+            generate_specsheet_pdf,
+            resolve_specsheet_topic_name,
+        )
+
+        project_id = str(parameters.get("project_id") or "").strip()
+        target_name = str(parameters.get("target_name") or "").strip()
+        output_path = str(parameters.get("output_path") or "").strip()
+        environment = str(parameters.get("environment") or "default").strip() or "default"
+        is_tag = bool(parameters.get("is_tag", False))
+        metadata = parameters.get("trend_metadata") if isinstance(parameters.get("trend_metadata"), dict) else {}
+        version = str(parameters.get("version") or metadata.get("pilot_auto_version") or "").strip()
+        topic = str(parameters.get("topic") or metadata.get("topic_name") or DEFAULT_SPECSHEET_TOPIC).strip()
+        description = str(parameters.get("description") or target_name or "").strip()
+        poll_interval = float(parameters.get("poll_interval", 60.0))
+        max_wait_seconds = float(parameters.get("max_wait_seconds", 3600.0 * 24 * 7))
+        analysis_phase = str(
+            parameters.get("analysis_phase")
+            or "perception.object_recognition.tracking.objects"
+        ).strip()
+        skip_large_file = _RELEASE_SKIP_LARGE_FILE
+        large_file_mb = float(parameters.get("large_file_mb") or _RELEASE_LARGE_FILE_MB)
+        labels = parameters.get("labels") or DEFAULT_SPECSHEET_LABELS
+        labels = [str(label).strip() for label in labels if str(label).strip()]
+        if not labels:
+            labels = list(DEFAULT_SPECSHEET_LABELS)
+
+        if not project_id or not target_name or not output_path or not version:
+            raise ValueError("Missing project_id, target_name, output_path, or Pilot.Auto version.")
+        if "trend" not in [str(tag).strip() for tag in metadata.get("tags", [])]:
+            raise ValueError("Release metadata must include tags: [trend].")
+
+        release_root = Path(output_path)
+        release_root.mkdir(parents=True, exist_ok=True)
+        _write_release_metadata_file(release_root / "metadata.yaml", metadata)
+        performance_path = release_root / "performance"
+        devops_path = release_root / "devops"
+        role_paths = {
+            "performance": performance_path,
+            "devops": devops_path,
+            "planning_test": release_root / "planning_test",
+        }
+        os.environ["AUTH_PROFILE"] = environment
+        os.environ["EVALUATOR_ENVIRONMENT"] = environment
+
+        api = evaluator_api.EvaluationRunAPI()
+        optional_catalog_enabled = bool(parameters.get("optional_catalog_enabled", False))
+        optional_catalog_id = str(
+            parameters.get("optional_catalog_id") or _RELEASE_OPTIONAL_CATALOG_ID
+        ).strip()
+        optional_integration_id = str(parameters.get("optional_integration_id") or "").strip()
+        if optional_catalog_enabled and optional_catalog_id and not optional_integration_id:
+            append_task_log(task_id, f"Resolving Planning Test catalog integration: {optional_catalog_id}")
+            optional_integration_id = _resolve_active_integration_id(api, project_id, optional_catalog_id)
+        jobs = [
+            {
+                "role": "performance",
+                "label": "Performance Test",
+                "catalog_id": str(parameters.get("performance_catalog_id") or _RELEASE_PERFORMANCE_CATALOG_ID),
+                "integration_id": str(parameters.get("performance_integration_id") or _RELEASE_PERFORMANCE_INTEGRATION_ID),
+                "job_id": str(parameters.get("performance_job_id") or "").strip(),
+            },
+            {
+                "role": "devops",
+                "label": "Devops Test",
+                "catalog_id": str(parameters.get("devops_catalog_id") or _RELEASE_DEVOPS_CATALOG_ID),
+                "integration_id": str(parameters.get("devops_integration_id") or _RELEASE_DEVOPS_INTEGRATION_ID),
+                "job_id": str(parameters.get("devops_job_id") or "").strip(),
+            },
+        ]
+        if optional_catalog_enabled:
+            jobs.append(
+                {
+                    "role": "planning_test",
+                    "label": "Planning Test",
+                    "catalog_id": optional_catalog_id,
+                    "integration_id": optional_integration_id,
+                    "job_id": str(parameters.get("optional_job_id") or "").strip(),
+                }
+            )
+        summary: Dict[str, Any] = {
+            "job": "run_release_specsheet_workflow",
+            "release_root": str(release_root),
+            "version": version,
+            "topic": topic,
+            "evaluator_jobs": {},
+            "analysis_artifacts": {},
+            "specsheet_pdf": "",
+        }
+        update_task_result_summary(task_id, summary)
+        update_task_progress(task_id, message="Preparing release evaluator jobs", pct=2)
+
+        for item in jobs:
+            schedule_description = f"{description} | {item['label']}"
+            item["description"] = schedule_description
+            role = str(item["role"])
+            local_path = role_paths[role]
+            local_ready = _find_release_parquet(local_path) is not None or _has_release_download_artifacts(local_path)
+            item["local_artifacts_ready"] = local_ready
+            job_id = str(item.get("job_id") or "").strip()
+            if job_id:
+                append_task_log(task_id, f"Using existing {item['label']}: {job_id}")
+                status = "existing"
+            elif local_ready:
+                append_task_log(task_id, f"Using existing local artifacts for {item['label']}: {local_path}")
+                status = "local_artifacts"
+            else:
+                append_task_log(task_id, f"Scheduling {item['label']}: catalog={item['catalog_id']}")
+                result = api.schedule_job(
+                    project_id=project_id,
+                    catalog_id=item["catalog_id"],
+                    integration_id=item["integration_id"],
+                    target_name=target_name,
+                    suite_ids=None,
+                    max_retries=0,
+                    description=schedule_description,
+                    clean_build=True,
+                    debug=False,
+                    release=False,
+                    record_caret=False,
+                    log_expiration_time_in_days=10.0,
+                    is_tag=is_tag,
+                )
+                job_id = str(result.get("job_id") or "").strip()
+                if not job_id:
+                    raise RuntimeError(f"No job_id returned for {item['label']}.")
+                item["job_id"] = job_id
+                status = "scheduled"
+            report_url = evaluator_api.get_job_report_url(project_id, job_id) if job_id else ""
+            summary["evaluator_jobs"][item["role"]] = {
+                "job_id": job_id,
+                "report_url": report_url,
+                "catalog_id": item["catalog_id"],
+                "integration_id": item["integration_id"],
+                "status": status,
+                "description": schedule_description,
+            }
+            if status == "scheduled":
+                append_task_log(task_id, f"Scheduled {item['label']}: {job_id}")
+            update_task_result_summary(task_id, summary)
+
+        wait_span = 40.0 / max(1, len(jobs))
+        for idx, item in enumerate(jobs, start=1):
+            job_id = str(item["job_id"])
+            label = str(item["label"])
+            base_pct = 5 + (idx - 1) * wait_span
+            if not job_id and item.get("local_artifacts_ready"):
+                append_task_log(task_id, f"Skipping evaluator wait for {label}; local artifacts already exist.")
+                summary["evaluator_jobs"][item["role"]]["status"] = "local_artifacts"
+                update_task_progress(task_id, message=f"{label}: using local artifacts", pct=base_pct + wait_span - 2.0)
+                update_task_result_summary(task_id, summary)
+                continue
+
+            def _on_check(status: str, elapsed: float, *, role: str = str(item["role"]), pct_base: float = base_pct) -> None:
+                pct = min(pct_base + (elapsed / max_wait_seconds) * max(2.0, wait_span - 2.0), pct_base + wait_span - 2.0)
+                summary["evaluator_jobs"][role]["status"] = status
+                update_task_progress(
+                    task_id,
+                    message=f"{label}: {status} ({elapsed / 3600:.1f}h elapsed)",
+                    pct=pct,
+                )
+                update_task_result_summary(task_id, summary)
+
+            append_task_log(task_id, f"Waiting for {label}: {job_id}")
+            final_report = api.wait_for_job_completion(
+                project_id=project_id,
+                job_id=job_id,
+                poll_interval=poll_interval,
+                max_wait_seconds=max_wait_seconds,
+                on_check=_on_check,
+            )
+            status = evaluator_api.extract_job_status(final_report)
+            summary["evaluator_jobs"][item["role"]]["status"] = status
+            append_task_log(task_id, f"{label} completed with status: {status}")
+            try:
+                suite_rows = api.get_suite_summary(project_id, job_id, use_available_case_results=True)
+            except Exception as exc:
+                append_task_log(task_id, f"WARNING: Could not fetch suite summary for {label}: {exc}")
+                suite_rows = []
+            item["suite_rows"] = suite_rows
+            summary["evaluator_jobs"][item["role"]]["suite_count"] = len(suite_rows)
+            update_task_result_summary(task_id, summary)
+
+        update_task_progress(task_id, message="Building normal CSV/parquet analysis artifacts", pct=48)
+        artifact_span = 30.0 / max(1, len(jobs))
+        for artifact_idx, item in enumerate(jobs):
+            role = str(item["role"])
+            analysis_path = role_paths[role]
+            artifact_summary = _build_release_analysis_artifacts(
+                task_id=task_id,
+                project_id=project_id,
+                job_id=str(item["job_id"]),
+                role=role,
+                output_path=analysis_path,
+                phase=analysis_phase,
+                run_eval=bool(parameters.get("run_eval", False)),
+                skip_large_file=skip_large_file,
+                large_file_mb=large_file_mb,
+                progress_start=48 + (artifact_span * artifact_idx),
+                progress_end=48 + (artifact_span * (artifact_idx + 1)),
+            )
+            summary["analysis_artifacts"][role] = artifact_summary
+            update_task_result_summary(task_id, summary)
+
+            child_params = {
+                **parameters,
+                "output_path": str(analysis_path),
+                "catalog_id": item["catalog_id"],
+                "integration_id": item["integration_id"],
+                "job_id": item["job_id"],
+                "download_type": "archives",
+                "phase": analysis_phase,
+                "skip_large_file": skip_large_file,
+                "large_file_mb": large_file_mb,
+                "run_eval": bool(parameters.get("run_eval", False)),
+                "generate_parquet": True,
+                "eval_recursive": bool(parameters.get("run_eval", False)),
+            }
+            _mark_run_status(
+                task_id,
+                child_params,
+                task_type="run_release_specsheet_workflow",
+                status="completed",
+                result_path=str(analysis_path),
+                create_missing=True,
+                extra={
+                    "release_specsheet": {
+                        "root": str(release_root),
+                        "role": role,
+                        "metadata": metadata,
+                    },
+                    "evaluator": {
+                        "job_id": str(item["job_id"]),
+                        "report_url": summary["evaluator_jobs"][role].get("report_url", ""),
+                        "status": summary["evaluator_jobs"][role].get("status", ""),
+                        "catalog_id": item["catalog_id"],
+                        "integration_id": item["integration_id"],
+                        "target_name": target_name,
+                        "description": str(item.get("description") or ""),
+                        "title": str(item.get("description") or ""),
+                    },
+                    "download": {
+                        **artifact_summary.get("download", {}),
+                        "mode": "release_specsheet",
+                        "download_type": "archives",
+                        "phase": analysis_phase,
+                    },
+                    "evaluation": {
+                        **artifact_summary.get("eval", {}),
+                        "enabled": bool(parameters.get("run_eval", False)),
+                        "recursive": bool(parameters.get("run_eval", False)),
+                    },
+                    "parquet": {
+                        "enabled": True,
+                        "path": artifact_summary.get("parquet_path", ""),
+                    },
+                },
+            )
+
+        detected_topic, detected_topics = resolve_specsheet_topic_name(
+            performance_path,
+            topic,
+            fallback_topic=DEFAULT_SPECSHEET_TOPIC,
+        )
+        if detected_topic != topic:
+            append_task_log(
+                task_id,
+                (
+                    f"Using detected specsheet topic {detected_topic} instead of requested topic {topic} "
+                    f"(detected: {', '.join(detected_topics) if detected_topics else 'none'})"
+                ),
+            )
+            topic = detected_topic
+            summary["topic"] = topic
+            summary["detected_topics"] = detected_topics
+            update_task_result_summary(task_id, summary)
+
+        update_task_progress(task_id, message="Writing release trend summaries", pct=78)
+        write_trend_metadata(devops_path, metadata)
+        devops_job = next(item for item in jobs if item["role"] == "devops")
+        devops_summary_target = devops_path / "resources" / "summary.json"
+        devops_suite_rows = list(devops_job.get("suite_rows") or [])
+        if not devops_suite_rows:
+            existing_suite_rows = _suite_rows_from_existing_devops_summary(devops_summary_target)
+            if existing_suite_rows:
+                append_task_log(task_id, "Rebuilding DevOps trend summary from existing suite pass-rate rows.")
+                devops_suite_rows = existing_suite_rows
+        devops_summary_path = _write_devops_trend_summary(devops_summary_target, devops_suite_rows)
+        if devops_summary_path is None and devops_summary_target.exists():
+            devops_summary_path = devops_summary_target
+            append_task_log(task_id, f"Using existing DevOps trend summary: {devops_summary_path}")
+        if devops_summary_path is None:
+            append_task_log(task_id, "WARNING: DevOps trend summary had no suite pass-rate rows.")
+        else:
+            append_task_log(task_id, f"DevOps trend summary written: {devops_summary_path}")
+
+        update_task_progress(task_id, message="Generating app-native release specsheet", pct=82)
+        specsheet_pdf, generated = generate_specsheet_pdf(
+            performance_path,
+            project_id=project_id,
+            version=version,
+            labels=labels,
+            topic_name=topic,
+            include_trend=True,
+            trend_metadata=metadata,
+            force=bool(parameters.get("overwrite", True)),
+            progress_callback=lambda msg: append_task_log(task_id, f"specsheet: {msg}"),
+        )
+        summary["specsheet_pdf"] = str(specsheet_pdf)
+        summary["specsheet_generated"] = bool(generated)
+
+        update_task_progress(task_id, message="Release specsheet ready", pct=100)
+        update_task_result_summary(task_id, summary)
+        _mark_run_status(
+            task_id,
+            parameters,
+            task_type="run_release_specsheet_workflow",
+            status="completed",
+            result_path=str(specsheet_pdf),
+            extra={
+                "release_specsheet": {
+                    "root": str(release_root),
+                    "specsheet_pdf": str(specsheet_pdf),
+                    "evaluator_jobs": summary["evaluator_jobs"],
+                    "analysis_artifacts": summary["analysis_artifacts"],
+                    "metadata": metadata,
+                }
+            },
+        )
+        append_task_log(task_id, f"Release specsheet PDF ready: {specsheet_pdf}")
+        update_task_status(task_id, "completed", result_path=str(specsheet_pdf))
+    except Exception as e:
+        append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(
+            task_id,
+            parameters,
+            task_type="run_release_specsheet_workflow",
+            status="failed",
+            error_message=str(e),
+            create_missing=True,
+        )
         update_task_status(task_id, "failed", error_message=str(e))
+        raise
+
+
+def job_run_evaluator_and_process(task_id: str, parameters: Dict[str, Any]) -> None:
+    """
+    Full combined workflow: Run Evaluator + Download + Eval + Parquet.
+    
+    Steps:
+    1. Schedule evaluator job (get job_id)
+    2. Poll until evaluator completes
+    3. Download results
+    4. Run eval
+    5. Generate parquet
+    """
+    update_task_status(task_id, "running")
+    append_task_log(task_id, "Starting run_evaluator_and_process workflow")
+    _mark_run_status(
+        task_id,
+        parameters,
+        task_type="run_evaluator_and_process",
+        status="running",
+        create_missing=True,
+    )
+    
+    try:
+        from lib import evaluator_api
+        from lib import download_core
+        
+        # Import eval_summary
+        eval_summary = _import_eval_summary()
+        pkl_archive_to_parquet = _import_catalog_io()
+        
+        # Extract parameters
+        project_id = parameters.get("project_id")
+        catalog_id = parameters.get("catalog_id")
+        integration_id = parameters.get("integration_id")
+        source_job_id = parameters.get("source_job_id")
+        suite_ids = parameters.get("suite_ids")
+        target_name = parameters.get("target_name")  # branch name or tag
+        description = parameters.get("description", "no description")
+        output_path = parameters.get("output_path")
+        trend_metadata = parameters.get("trend_metadata") if isinstance(parameters.get("trend_metadata"), dict) else None
+        trend_role = str(parameters.get("trend_role") or "").strip()
+
+        def _write_devops_trend_summary_from_suites(rows: list[dict[str, Any]]) -> None:
+            if not output_path:
+                return
+            summary_payload: Dict[str, Any] = {"DevOps": {"Suite pass rate": {}}}
+            for row in rows or []:
+                suite_name = str(row.get("name") or row.get("suite_name") or row.get("simulation") or "suite").strip()
+                total = int(row.get("all", 0) or row.get("total", 0) or 0)
+                passed = int(row.get("success", 0) or row.get("passed", 0) or 0)
+                if total <= 0:
+                    failed = int(row.get("fail", 0) or row.get("failed", 0) or 0)
+                    canceled = int(row.get("cancel", 0) or row.get("canceled", 0) or 0)
+                    total = passed + failed + canceled
+                if total <= 0:
+                    continue
+                summary_payload["DevOps"]["Suite pass rate"][suite_name] = {
+                    "passed": passed,
+                    "total": total,
+                }
+            if not summary_payload["DevOps"]["Suite pass rate"]:
+                return
+            resource_dir = Path(output_path) / "resources"
+            resource_dir.mkdir(parents=True, exist_ok=True)
+            with (resource_dir / "summary.json").open("w", encoding="utf-8") as fh:
+                json.dump(summary_payload, fh, ensure_ascii=False, indent=2)
+        
+        # Eval options
+        run_eval = parameters.get("run_eval", True)
+        generate_parquet = parameters.get("generate_parquet", True)
+        eval_recursive = parameters.get("eval_recursive", True)
+        eval_overwrite = parameters.get("eval_overwrite", False)
+        
+        # Download options
+        download_type = parameters.get("download_type", "archives")
+        phase = parameters.get("phase", "perception.object_recognition.tracking.objects")
+        skip_large_file = parameters.get("skip_large_file", False)
+        large_file_mb = float(parameters.get("large_file_mb", 50.0))
+        keep_zip_files = parameters.get("keep_zip_files", False)
+        
+        # Evaluator polling options
+        poll_interval = float(parameters.get("poll_interval", 60.0))
+        max_wait_seconds = float(parameters.get("max_wait_seconds", 3600.0 * 24 * 7))  # 1 week default
+        download_ready_timeout = float(parameters.get("download_ready_timeout", 1800.0))
+        download_ready_poll_interval = float(
+            parameters.get("download_ready_poll_interval", min(max(poll_interval, 10.0), 60.0))
+        )
+        
+        # Scheduling options
+        max_retries = parameters.get("max_retries", 1)
+        clean_build = parameters.get("clean_build", False)
+        debug = parameters.get("debug", False)
+        is_tag = parameters.get("is_tag", False)
+        release = bool(parameters.get("release", False))
+        record_caret = bool(parameters.get("record_caret", False))
+        log_expiration_time_in_days = float(parameters.get("log_expiration_time_in_days", 14.0))
+
+        has_source_job = bool(source_job_id)
+        has_fresh_source = bool(integration_id and target_name)
+        if not project_id or not catalog_id or not output_path or (not has_source_job and not has_fresh_source):
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="run_evaluator_and_process",
+                status="failed",
+                error_message="Missing required parameters",
+                create_missing=True,
+            )
+            update_task_status(task_id, "failed", error_message="Missing required parameters")
+            return
+        
+        environment = parameters.get("environment", "default")
+        os.environ["AUTH_PROFILE"] = environment
+        os.environ["EVALUATOR_ENVIRONMENT"] = environment
+        
+        def on_progress(msg: str) -> None:
+            append_task_log(task_id, msg)
+            _append_run_event(task_id, parameters, task_type="run_evaluator_and_process", message=msg)
+            update_task_progress(task_id, message=msg)
+        
+        def on_warning(msg: str) -> None:
+            append_task_log(task_id, f"WARNING: {msg}")
+            _append_run_event(task_id, parameters, task_type="run_evaluator_and_process", message=f"WARNING: {msg}")
+        
+        # Step 1: Schedule evaluator job
+        on_progress("Step 1/5: Scheduling evaluator job...")
+        if source_job_id:
+            append_task_log(
+                task_id,
+                f"Project: {project_id}, Catalog: {catalog_id}, Reuse build from job: {source_job_id}",
+            )
+        else:
+            append_task_log(task_id, f"Project: {project_id}, Catalog: {catalog_id}, Target: {target_name}")
+        
+        try:
+            api = evaluator_api.EvaluationRunAPI()
+            
+            result = api.schedule_job(
+                project_id=project_id,
+                catalog_id=catalog_id,
+                integration_id=integration_id,
+                target_name=target_name,
+                source_job_id=source_job_id,
+                suite_ids=suite_ids,
+                max_retries=max_retries,
+                description=description,
+                clean_build=clean_build,
+                debug=debug,
+                release=release,
+                record_caret=record_caret,
+                log_expiration_time_in_days=log_expiration_time_in_days,
+                is_tag=is_tag,
+            )
+        except Exception as e:
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="run_evaluator_and_process",
+                status="failed",
+                error_message=f"Failed to schedule evaluator job: {e}",
+                create_missing=True,
+            )
+            update_task_status(task_id, "failed", error_message=f"Failed to schedule evaluator job: {e}")
+            return
+        
+        job_id = result.get("job_id")
+        if not job_id:
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="run_evaluator_and_process",
+                status="failed",
+                error_message="No job_id returned from evaluator API",
+                create_missing=True,
+            )
+            update_task_status(task_id, "failed", error_message="No job_id returned from evaluator API")
+            return
+        
+        report_url = evaluator_api.get_job_report_url(project_id, job_id)
+        append_task_log(task_id, f"Scheduled evaluator job: {job_id}")
+        append_task_log(task_id, f"Report URL: {report_url}")
+        update_task_progress(task_id, message=f"Evaluator job scheduled: {job_id}", pct=5)
+        summary = {
+            "job": "run_evaluator_and_process",
+            "evaluator_job_id": job_id,
+            "evaluator_report_url": report_url,
+            "evaluator_status": "scheduled",
+            "source_job_id": source_job_id or "",
+            "download_summary": {"total": 0, "success": 0, "failed": 0},
+            "eval_summary": {},
+            "parquet_path": "",
+        }
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            create_missing=True,
+            extra={
+                "evaluator": {
+                    "job_id": job_id,
+                    "report_url": report_url,
+                    "status": "scheduled",
+                    "catalog_id": catalog_id,
+                    "integration_id": integration_id or "",
+                    "source_job_id": source_job_id or "",
+                    "target_name": target_name or "",
+                    "description": description or "",
+                    "is_tag": bool(is_tag),
+                    "title": description or "",
+                }
+            },
+        )
+
+        if trend_metadata:
+            try:
+                write_trend_metadata(output_path, trend_metadata)
+                append_task_log(task_id, "Saved release trend metadata.")
+                _update_run_metadata(
+                    task_id,
+                    parameters,
+                    task_type="run_evaluator_and_process",
+                    extra={
+                        "trend": {
+                            "enabled": True,
+                            "role": trend_role,
+                            "metadata": trend_metadata,
+                        }
+                    },
+                )
+            except Exception as e:
+                append_task_log(task_id, f"WARNING: Could not save release trend metadata: {e}")
+        
+        # Step 2: Poll for evaluator completion
+        on_progress("Step 2/5: Waiting for evaluator to complete...")
+        append_task_log(task_id, "This may take a while depending on evaluator queue and run time...")
+        last_suite_snapshot = {"key": None, "time": 0.0}
+        
+        def on_eval_progress(status: str, elapsed: float) -> None:
+            hours = elapsed / 3600
+            msg = f"Evaluator status: {status} (elapsed: {hours:.1f}h)"
+            append_task_log(task_id, msg)
+            # Progress: 5% to 40% during evaluation wait
+            pct = min(5 + (elapsed / max_wait_seconds) * 35, 40)
+            update_task_progress(task_id, message=f"Evaluator: {status} ({hours:.1f}h elapsed)", pct=pct)
+            summary["evaluator_status"] = status
+
+            should_snapshot = elapsed < 60 or (elapsed - last_suite_snapshot["time"]) >= 600
+            if not should_snapshot:
+                update_task_result_summary(task_id, summary)
+                return
+
+            try:
+                suite_rows = api.get_suite_summary(project_id, job_id, use_available_case_results=True)
+            except Exception:
+                update_task_result_summary(task_id, summary)
+                return
+
+            suite_summary = _summarize_suite_reports(suite_rows)
+            totals = _suite_case_totals(suite_rows)
+            snapshot_key = (
+                totals["total"],
+                totals["success"],
+                totals["failed"],
+                totals["canceled"],
+                tuple((row["suite_name"], row["failed"]) for row in suite_summary if row["failed"] > 0),
+            )
+            last_suite_snapshot["time"] = elapsed
+            if snapshot_key == last_suite_snapshot["key"]:
+                summary["evaluator_case_totals"] = totals
+                summary["evaluator_suites"] = suite_summary
+                _update_run_metadata(
+                    task_id,
+                    parameters,
+                    task_type="run_evaluator_and_process",
+                    extra={
+                        "evaluator": {
+                            "status": status,
+                            "case_totals": totals,
+                            "suites": suite_summary,
+                        }
+                    },
+                )
+                update_task_result_summary(task_id, summary)
+                return
+
+            last_suite_snapshot["key"] = snapshot_key
+            summary["evaluator_case_totals"] = totals
+            summary["evaluator_suites"] = suite_summary
+            if totals["total"] > 0:
+                failing = [row for row in suite_summary if row["failed"] > 0]
+                if failing:
+                    top = ", ".join(f"{row['suite_name']}={row['failed']}" for row in failing[:3])
+                    append_task_log(
+                        task_id,
+                        (
+                            "Evaluator progress snapshot: "
+                            f"{totals['success']}/{totals['total']} success, "
+                            f"{totals['failed']} failed, {totals['canceled']} canceled. "
+                            f"Failing suites: {top}"
+                        ),
+                    )
+                else:
+                    append_task_log(
+                        task_id,
+                        (
+                            "Evaluator progress snapshot: "
+                            f"{totals['success']}/{totals['total']} success, "
+                            f"{totals['failed']} failed, {totals['canceled']} canceled."
+                        ),
+                    )
+            _update_run_metadata(
+                task_id,
+                parameters,
+                task_type="run_evaluator_and_process",
+                extra={
+                    "evaluator": {
+                        "status": status,
+                        "case_totals": totals,
+                        "suites": suite_summary,
+                    }
+                },
+            )
+            update_task_result_summary(task_id, summary)
+        
+        try:
+            final_report = api.wait_for_job_completion(
+                project_id=project_id,
+                job_id=job_id,
+                poll_interval=poll_interval,
+                max_wait_seconds=max_wait_seconds,
+                on_check=on_eval_progress,
+            )
+        except evaluator_api.EvaluationAPIError as e:
+            append_task_log(task_id, f"Evaluator wait error: {e}")
+            _mark_run_status(
+                task_id,
+                parameters,
+                task_type="run_evaluator_and_process",
+                status="failed",
+                error_message=f"Evaluator failed or timed out: {e}",
+            )
+            update_task_status(task_id, "failed", error_message=f"Evaluator failed or timed out: {e}")
+            return
+        
+        test_status = evaluator_api.extract_job_status(final_report)
+        try:
+            suite_rows = api.get_suite_summary(project_id, job_id, use_available_case_results=True)
+        except Exception as e:
+            append_task_log(task_id, f"Could not fetch suite summary: {e}")
+            suite_rows = []
+        try:
+            case_reports = api.get_case_reports(project_id, job_id)
+        except Exception as e:
+            append_task_log(task_id, f"Could not fetch case reports: {e}")
+            case_reports = []
+
+        if trend_metadata and trend_role == "devops":
+            try:
+                _write_devops_trend_summary_from_suites(suite_rows)
+                append_task_log(task_id, "Saved DevOps trend summary.")
+            except Exception as e:
+                append_task_log(task_id, f"WARNING: Could not save DevOps trend summary: {e}")
+
+        evaluator_summary = _build_evaluator_result_summary(
+            job_id=job_id,
+            report_url=report_url,
+            evaluator_status=test_status,
+            final_report=final_report,
+            suite_rows=suite_rows,
+            failed_cases=case_reports,
+        )
+        summary.update(evaluator_summary)
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            extra={
+                "evaluator": {
+                    "job_id": job_id,
+                    "report_url": report_url,
+                    "status": test_status,
+                    "title": summary.get("evaluator_title", ""),
+                    "scheduled_by": summary.get("evaluator_scheduled_by", ""),
+                    "catalog_id": summary.get("evaluator_catalog_id", ""),
+                    "catalog_name": summary.get("evaluator_catalog_name", ""),
+                    "catalog_version_id": summary.get("evaluator_catalog_version_id", ""),
+                    "catalog_url": summary.get("evaluator_catalog_url", ""),
+                    "target": summary.get("evaluator_target", ""),
+                    "git_sha": summary.get("evaluator_git_sha", ""),
+                    "git_ref_url": summary.get("evaluator_git_ref_url", ""),
+                    "git_commit_url": summary.get("evaluator_git_commit_url", ""),
+                    "source_url": summary.get("evaluator_source_url", ""),
+                    "source_repo_label": summary.get("evaluator_source_repo_label", ""),
+                    "build_status": summary.get("evaluator_build_status", ""),
+                    "test_status": summary.get("evaluator_test_status", ""),
+                    "fail_message": summary.get("evaluator_fail_message", ""),
+                    "case_totals": summary.get("evaluator_case_totals", {}),
+                    "suites": summary.get("evaluator_suites", []),
+                    "failed_cases": summary.get("evaluator_failed_cases", []),
+                }
+            },
+        )
+
+        fail_message = summary.get("evaluator_fail_message", "")
+        if evaluator_api.is_success_job_status(test_status):
+            update_task_progress(task_id, message="Evaluator completed successfully", pct=40)
+            append_task_log(task_id, f"Evaluator completed with status: {test_status}")
+        else:
+            append_task_log(task_id, f"Evaluator completed with non-success status: {test_status}")
+            if fail_message:
+                append_task_log(task_id, f"Evaluator fail message: {fail_message}")
+            case_totals = summary.get("evaluator_case_totals", {})
+            append_task_log(
+                task_id,
+                (
+                    "Evaluator result summary: "
+                    f"{case_totals.get('success', 0)}/{case_totals.get('total', 0)} success, "
+                    f"{case_totals.get('failed', 0)} failed, {case_totals.get('canceled', 0)} canceled"
+                ),
+            )
+            failed_cases = summary.get("evaluator_failed_cases", [])
+            for case in failed_cases[:5]:
+                detail = case.get("fail_message", "") or case.get("status", "")
+                append_task_log(
+                    task_id,
+                    f"Failed case: {case.get('suite_name', '')} / {case.get('scenario_name', '')} - {detail}",
+                )
+            update_task_progress(task_id, message=f"Evaluator finished with status {test_status}; trying download", pct=40)
+        
+        # Step 3: Download results
+        on_progress("Step 3/5: Downloading results...")
+        update_task_progress(task_id, message="Downloading results...", pct=45)
+        
+        download_deadline = time.time() + download_ready_timeout
+        while True:
+            try:
+                dl_result = download_core.run_download_results(
+                    project_id=project_id,
+                    job_id=job_id,
+                    suite_id=None,
+                    output_path=output_path,
+                    download_type=download_type,
+                    phase=phase,
+                    skip_large_file=skip_large_file,
+                    large_file_mb=large_file_mb,
+                    keep_zip_files=keep_zip_files,
+                    suite_ids=suite_ids,
+                    on_progress=on_progress,
+                    on_warning=on_warning,
+                )
+                failure_count, total_attempted, rows = dl_result
+                success_count = total_attempted - failure_count
+                download_success = success_count > 0
+                
+                if not download_success:
+                    evaluator_msg = ""
+                    if not evaluator_api.is_success_job_status(test_status):
+                        evaluator_msg = f" Evaluator status was {test_status}."
+                    _mark_run_status(
+                        task_id,
+                        parameters,
+                        task_type="run_evaluator_and_process",
+                        status="failed",
+                        error_message=f"Download failed: {failure_count} of {total_attempted} scenarios failed.{evaluator_msg}",
+                        result_path=output_path,
+                    )
+                    update_task_status(task_id, "failed", 
+                        error_message=f"Download failed: {failure_count} of {total_attempted} scenarios failed.{evaluator_msg}")
+                    return
+                break
+
+            except RuntimeError as e:
+                if "No case reports found" not in str(e) or time.time() >= download_deadline:
+                    evaluator_msg = ""
+                    if not evaluator_api.is_success_job_status(test_status):
+                        evaluator_msg = (
+                            f" Evaluator status was {test_status}. "
+                            "This usually means the job failed before producing downloadable case logs."
+                        )
+                    _mark_run_status(
+                        task_id,
+                        parameters,
+                        task_type="run_evaluator_and_process",
+                        status="failed",
+                        error_message=f"Download failed: {e}{evaluator_msg}",
+                        result_path=output_path,
+                    )
+                    update_task_status(task_id, "failed", error_message=f"Download failed: {e}{evaluator_msg}")
+                    return
+
+                wait_seconds = min(
+                    download_ready_poll_interval,
+                    max(1.0, download_deadline - time.time()),
+                )
+                msg = f"Case reports are not ready yet; retrying download in {wait_seconds:.0f}s"
+                append_task_log(task_id, f"{msg}. Detail: {e}")
+                update_task_progress(task_id, message=msg, pct=45)
+                time.sleep(wait_seconds)
+                
+            except Exception as e:
+                _mark_run_status(
+                    task_id,
+                    parameters,
+                    task_type="run_evaluator_and_process",
+                    status="failed",
+                    error_message=f"Download failed: {e}",
+                    result_path=output_path,
+                )
+                update_task_status(task_id, "failed", error_message=f"Download failed: {e}")
+                return
+        
+        update_task_progress(task_id, message=f"Download complete: {success_count}/{total_attempted} succeeded", pct=60)
+        summary["download_summary"] = {
+            "total": total_attempted,
+            "success": success_count,
+            "failed": failure_count,
+        }
+        summary["download_rows"] = rows[:500]
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            extra={
+                "download": {
+                    "mode": "run_evaluator_and_process",
+                    "total": total_attempted,
+                    "success": success_count,
+                    "failed": failure_count,
+                    "download_type": download_type,
+                    "phase": phase,
+                    "skip_large_file": bool(skip_large_file),
+                    "large_file_mb": large_file_mb,
+                    "keep_zip_files": bool(keep_zip_files),
+                    "rows": rows[:100],
+                }
+            },
+        )
+        
+        # Step 4: Run eval
+        if run_eval:
+            on_progress("Step 4/5: Running evaluation...")
+            update_task_progress(task_id, message="Running evaluation...", pct=65)
+            
+            target_dirs = eval_summary.find_eval_result_dirs(output_path, recursive=eval_recursive)
+            if target_dirs:
+                total = len(target_dirs)
+                eval_statuses = _run_eval_result_dirs(
+                    task_id=task_id,
+                    eval_summary=eval_summary,
+                    target_dirs=target_dirs,
+                    overwrite=eval_overwrite,
+                    eval_workers=_eval_worker_count(parameters, total),
+                    pct_start=65.0,
+                    pct_end=85.0,
+                    label="Eval",
+                )
+                
+                # Generate summary CSVs
+                update_task_progress(task_id, message="Generating Summary.csv / Score.csv", pct=85)
+                csv_info = eval_summary.generate_summary_and_score_csv(output_path)
+                failed = [s for s in eval_statuses if s.get("status") == "failed"]
+                skipped = [s for s in eval_statuses if s.get("status") == "skipped"]
+                succeeded = [s for s in eval_statuses if s.get("status") == "success"]
+                
+                eval_result_summary = {
+                    "directories_processed": total,
+                    "success": len(succeeded),
+                    "failed": len(failed),
+                    "skipped": len(skipped),
+                    "summary_path": csv_info.get("summary_path", output_path),
+                    "summary_rows": csv_info.get("summary_rows", 0),
+                    "score_rows": csv_info.get("score_rows", 0),
+                }
+                append_task_log(task_id, f"Eval complete: {len(succeeded)}/{total} succeeded")
+            else:
+                eval_result_summary = {"directories_processed": 0, "success": 0, "failed": 0, "skipped": 0}
+                append_task_log(task_id, "No eval result directories found")
+        else:
+            eval_result_summary = {}
+        
+        update_task_progress(task_id, message="Evaluation complete", pct=85)
+        summary["eval_summary"] = eval_result_summary
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            extra={
+                "evaluation": {
+                    **eval_result_summary,
+                    "enabled": bool(run_eval),
+                    "recursive": bool(eval_recursive),
+                    "overwrite": bool(eval_overwrite),
+                }
+            },
+        )
+        
+        # Step 5: Generate parquet
+        parquet_path = ""
+        if generate_parquet and pkl_archive_to_parquet:
+            on_progress("Step 5/5: Generating parquet...")
+            update_task_progress(task_id, message="Generating parquet...", pct=90)
+            
+            try:
+                parquet_path = pkl_archive_to_parquet(
+                    output_path,
+                    on_progress=_parquet_progress_callback(
+                        task_id,
+                        prefix="Parquet",
+                        pct_start=90,
+                        pct_end=99,
+                    ),
+                    on_skip=lambda path, reason: append_task_log(
+                        task_id,
+                        f"Parquet skipped {path}: {reason}",
+                    ),
+                    project_id=project_id,
+                    job_id=job_id,
+                )
+                update_task_progress(task_id, message="Parquet generated", pct=99)
+                append_task_log(task_id, f"Parquet generated: {parquet_path}")
+            except Exception as e:
+                append_task_log(task_id, f"Parquet generation failed: {e}")
+                parquet_path = ""
+        
+        update_task_progress(task_id, message="All steps complete", pct=100)
+        summary["parquet_path"] = parquet_path
+        
+        # Build final summary
+        update_task_result_summary(task_id, summary)
+        _update_run_metadata(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            extra={
+                "parquet": {
+                    "enabled": bool(generate_parquet),
+                    "path": parquet_path,
+                }
+            },
+        )
+        if evaluator_api.is_success_job_status(test_status):
+            append_task_log(task_id, "Workflow complete!")
+        else:
+            append_task_log(task_id, "Workflow complete. Evaluator job had failed test cases, but downloadable results were processed.")
+        _mark_run_status(
+            task_id,
+            parameters,
+            task_type="run_evaluator_and_process",
+            status="completed",
+            result_path=output_path,
+        )
+        update_task_status(task_id, "completed", result_path=output_path)
+        
     except Exception as e:
         append_task_log(task_id, f"Failed: {e}")
+        _mark_run_status(task_id, parameters, task_type="run_evaluator_and_process", status="failed", error_message=str(e))
         update_task_status(task_id, "failed", error_message=str(e))
         raise
 
@@ -277,6 +2548,9 @@ def job_download_scenarios(task_id: str, parameters: Dict[str, Any]) -> None:
     "build_parquet": job_build_parquet,
     "download_results": job_download_results,
     "download_scenarios": job_download_scenarios,
+    "download_and_eval": job_download_and_eval,
+    "run_release_specsheet_workflow": job_run_release_specsheet_workflow,
+    "run_evaluator_and_process": job_run_evaluator_and_process,
 }