From 8509f65f6e7362b392f50a6bad78c719e426345c Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Thu, 18 Jun 2026 22:10:29 +0200
Subject: [PATCH 01/11] feat: Enhance internal gateway writing plans with
 clarity on token management and file structuring

---
 .../internal-gateway-writing-plans/SKILL.md   | 19 +++++++++++++++++--
 .../references/plan-review-gate.md            |  2 +-
 .../scripts/plan_authoring.py                 |  9 +++++++--
 tests/test_plan_policy_contract.py            |  4 ++++
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/.github/skills/internal-gateway-writing-plans/SKILL.md b/.github/skills/internal-gateway-writing-plans/SKILL.md
index f2b456a..e7a6a4e 100644
--- a/.github/skills/internal-gateway-writing-plans/SKILL.md
+++ b/.github/skills/internal-gateway-writing-plans/SKILL.md
@@ -44,7 +44,12 @@ New `compact` plans should use `tmp/superpowers/mini-plan-*`.
 | Profile | When | Required files |
 | --- | --- | --- |
 | `compact` | Single owner, concrete target, one validation path, low-to-medium risk, and one execution lane. Best fit for small/fast executors after positive handoff validation. | `01-change-summary.md`, `02-execution.md` |
-| `extended` | Cross-family changes, higher risk, lower-context execution, multiple validators, or multi-slice execution state. Thinking-first profile with explicit control files and deterministic read order. | `01-change-summary.md`, `02-control.md`, `03-execution.md`, additional numbered files by category (`04-...`). |
+| `extended` | Cross-family changes, higher risk, lower-context execution, multiple validators, or multi-slice execution state. Soft-limit profile: use judgment-based size review with completeness over compression, explicit control files, and deterministic read order. | `01-change-summary.md`, `02-control.md`, `03-execution.md`, additional numbered files by category (`04-...`). |
+
+Escalate to `extended` when completeness risk is material: exports, reports, or
+datasets with non-trivial reconciliation; external API contracts
+(credentials, pagination, retries, schema pinning); executive-facing output;
+multiple validators; or synced always-on guidance edits.
 
 Do not use `compact` when the executor needs exact sources, target files,
 validators, blockers, or external pins that only `02-control.md`
@@ -70,6 +75,8 @@ can provide.
 - Compact plans have a 2,000 estimated-token total budget measured as
   `ceil(UTF-8 bytes / 4)` across plan Markdown files. Keep `02-execution.md`
   under 1,500 estimated tokens. Treat warnings as required review inputs.
+- For `extended`, treat token warnings as review inputs for completeness and
+  slicing. Prefer splitting into numbered files over compression.
 - `compact` uses exactly `01-change-summary.md` and `02-execution.md` during
   authoring. `extended` uses `01-change-summary.md`, `02-control.md`,
   `03-execution.md`, and optional higher numbered files.
@@ -98,6 +105,11 @@ can provide.
 - For `extended`, implementation-contract sections are merged into `02-control.md`
   with these exact headings: `Sources`, `Candidate targets`,
   `Validation commands`, `Blockers and fallback rules`, and `External pins`.
+- For `extended`, recommend adding deep companion files only when justified by
+  triggers, and keep them as recommendations (not ERROR-level required files):
+  `data-contract.md` for reconciled datasets and schema mappings,
+  `validation-runbook.md` for multi-validator troubleshooting or rollback paths,
+  and API/schema pin notes when external dependencies or credentials drive risk.
 - Apply a say-once rule: each control fact (target, owner, validator, blockers,
   pins, and source-item coverage) is written once in the owning file, and step
   files do not restate target/owner/validator.
@@ -126,7 +138,10 @@ can provide.
   `questions.md` file for `compact`.
 11. Run scope challenge and plan review gate for non-trivial plans.
 12. Run `audit` first, then run `handoff-check`; execute only when ready.
-13. Treat token warnings as review inputs for compression or split decisions, not as proof of measured savings.
+13. Treat token warnings as review inputs, not as proof of measured savings. For
+  `extended`, prefer splitting into numbered files over compression, and never
+  compress away source pins, schema contracts, validation rules, stop
+  conditions, or failure-investigation steps.
 
 ## Validation
 
diff --git a/.github/skills/internal-gateway-writing-plans/references/plan-review-gate.md b/.github/skills/internal-gateway-writing-plans/references/plan-review-gate.md
index 32d2ff9..d3da491 100644
--- a/.github/skills/internal-gateway-writing-plans/references/plan-review-gate.md
+++ b/.github/skills/internal-gateway-writing-plans/references/plan-review-gate.md
@@ -33,7 +33,7 @@ or handoff. It checks clarity and validity without creating reviewer personas.
 | Open questions | Is `questions.md` present and set to `- none` for execution handoff, or explicitly blocking handoff? |
 | Lifecycle status | Is plan state explicit (`scaffold`, `ready`, or `closed`) so an executor does not infer readiness? |
 | Token discipline | Does the ledger define `Initial evidence pass` and `Reading budget` so the executor can classify the folder with the fewest safe reads? |
-| Profile token budget | Is `compact` within the 2,000 estimated-token total budget, with `01-change-summary.md` under 300 and `02-execution.md` under 1,500, or escalated to `extended`? |
+| Profile token budget | Is `compact` within the 2,000 estimated-token total budget, with `01-change-summary.md` under 300 and `02-execution.md` under 1,500, or escalated to `extended`? For `extended`, are soft limits reviewed with completeness over compression and split-by-slice decisions when files grow large? |
 
 ## Outcomes
 
diff --git a/.github/skills/internal-gateway-writing-plans/scripts/plan_authoring.py b/.github/skills/internal-gateway-writing-plans/scripts/plan_authoring.py
index bc09161..6fc4b05 100644
--- a/.github/skills/internal-gateway-writing-plans/scripts/plan_authoring.py
+++ b/.github/skills/internal-gateway-writing-plans/scripts/plan_authoring.py
@@ -709,11 +709,16 @@ def _token_warnings(plan_folder: Path, profile: str | None = None) -> list[str]:
         control_names = {"01-change-summary.md", "02-control.md"}
         control_tokens = sum(tokens for name, tokens in file_tokens if name in control_names)
         if total_tokens and control_tokens / total_tokens > 0.7:
-            warnings.append("Initial control read is disproportionately large; compress or split the control files.")
+            warnings.append("Initial control read is disproportionately large; prefer splitting control facts into numbered files by delivery slice.")
 
     for name, tokens in file_tokens:
         if tokens > 1200:
-            warnings.append(f"Estimated token weight is high for {name}; split or compress by delivery slice.")
+            if profile == "extended":
+                warnings.append(
+                    f"Informational: estimated token weight is high for {name}; prefer splitting into numbered files by delivery slice."
+                )
+            else:
+                warnings.append(f"Estimated token weight is high for {name}; split or compress by delivery slice.")
 
     return warnings
 
diff --git a/tests/test_plan_policy_contract.py b/tests/test_plan_policy_contract.py
index 616bdc1..d584400 100644
--- a/tests/test_plan_policy_contract.py
+++ b/tests/test_plan_policy_contract.py
@@ -28,6 +28,10 @@ def test_writing_plans_declares_profile_only_handoff_contract() -> None:
     assert "mini-plan-*" in compact_reference
     assert "Decisioni aperte" in compact_reference
     assert "2,000 estimated tokens" in compact_reference
+    assert "completeness over compression" in writing_text
+    assert "Escalate to `extended`" in writing_text
+    assert "prefer splitting into numbered files over compression" in writing_text
+    assert "data-contract.md" in writing_text
 
 
 def test_executing_plans_accepts_compact_and_extended_consumers() -> None:

From e4fc1177b756645a1768064dc955e6e3b2a053b8 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Thu, 18 Jun 2026 22:13:35 +0200
Subject: [PATCH 02/11] feat: Improve Python version handling in virtual
 environment setup

---
 tools/analyze_copilot_debug_log/run.sh | 33 ++++++++++++++++++--------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/tools/analyze_copilot_debug_log/run.sh b/tools/analyze_copilot_debug_log/run.sh
index d1a080c..dc28c56 100755
--- a/tools/analyze_copilot_debug_log/run.sh
+++ b/tools/analyze_copilot_debug_log/run.sh
@@ -60,8 +60,10 @@ load_required_python_version() {
 
 select_python_bin() {
     if [[ -n "$PYTHON_BIN" ]]; then
+        PYTHON_BIN_EXPLICIT=1
         return
     fi
+    PYTHON_BIN_EXPLICIT=0
     PYTHON_BIN="python$REQUIRED_PYTHON_MAJOR_MINOR"
 }
 
@@ -70,6 +72,12 @@ verify_python_bin_version() {
     actual_version="$("$PYTHON_BIN" -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
 
     if [[ "$actual_version" == "$REQUIRED_PYTHON_MAJOR_MINOR" ]]; then
+        EXPECTED_PYTHON_MAJOR_MINOR="$REQUIRED_PYTHON_MAJOR_MINOR"
+        return
+    fi
+
+    if [[ "$PYTHON_BIN_EXPLICIT" -eq 1 ]]; then
+        EXPECTED_PYTHON_MAJOR_MINOR="$actual_version"
         return
     fi
 
@@ -82,27 +90,30 @@ verify_venv_version() {
     local venv_version
 
     if [[ ! -x "$venv_python" ]]; then
-        log_error "virtual environment is missing its Python interpreter: $venv_python"
-        exit 1
+        return 1
     fi
 
-    venv_version="$("$venv_python" -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
-    if [[ "$venv_version" == "$REQUIRED_PYTHON_MAJOR_MINOR" ]]; then
-        return
+    venv_version="$($venv_python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
+    if [[ "$venv_version" == "$EXPECTED_PYTHON_MAJOR_MINOR" ]]; then
+        return 0
     fi
 
-    log_error "existing virtual environment uses Python $venv_version, but .python-version requires $REQUIRED_PYTHON_VERSION. Remove $VENV_DIR and rerun."
-    exit 1
+    return 1
 }
 
 ensure_venv() {
     if [[ -d "$VENV_DIR" ]]; then
-        verify_venv_version
-        return
+        if verify_venv_version; then
+            return
+        fi
+        rm -rf "$VENV_DIR"
     fi
 
     "$PYTHON_BIN" -m venv "$VENV_DIR"
-    verify_venv_version
+    if ! verify_venv_version; then
+        log_error "virtual environment uses an unexpected Python version after creation: $VENV_DIR"
+        exit 1
+    fi
 }
 
 install_dependencies() {
@@ -151,6 +162,8 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
 VENV_DIR="$SCRIPT_DIR/.venv"
 PYTHON_BIN="${PYTHON_BIN:-}"
+PYTHON_BIN_EXPLICIT=0
+EXPECTED_PYTHON_MAJOR_MINOR=""
 PYTHON_VERSION_FILE="$REPO_ROOT/.python-version"
 REQUIREMENTS_FILE="$SCRIPT_DIR/requirements.txt"
 REQUIREMENTS_HASH_FILE="$VENV_DIR/.requirements.sha256"

From 492f7b1e67a2fba07fba793354ad5f9802e24494 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Thu, 18 Jun 2026 22:26:26 +0200
Subject: [PATCH 03/11] feat: Add counter-validation requirements to internal
 gateway review and update tests

---
 .github/skills/internal-gateway-review/SKILL.md           | 8 +++++++-
 .../internal-gateway-review/references/review-gate.md     | 7 +++++--
 tests/test_workflow_review_contract.py                    | 2 ++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/.github/skills/internal-gateway-review/SKILL.md b/.github/skills/internal-gateway-review/SKILL.md
index 53834ce..426194a 100644
--- a/.github/skills/internal-gateway-review/SKILL.md
+++ b/.github/skills/internal-gateway-review/SKILL.md
@@ -17,6 +17,11 @@ Portable review orchestrator. This skill owns review scope, lens selection,
 findings consolidation, critical support, and remediation-plan transition. It
 does not apply fixes.
 
+Before any user-visible review verdict, run a counter-validation pass that
+challenges the draft analysis for missing evidence, false positives, severity
+inflation, route errors, and ignored contrary evidence. Revise or reopen the
+review before presenting the analysis when the critique exposes a material gap use `internal-gateway-critical-master`.
+
 See `references/review-gate.md` for the review output contract and gate states.
 
 ## When to use
@@ -28,6 +33,7 @@ See `references/review-gate.md` for the review output contract and gate states.
 
 - Findings stay defect-first.
 - Review flow preserves compact context: prioritize diff and failing evidence first, then expand only when an evidence gap remains.
-- Review output carries findings, severity, confidence, evidence gap, route or next owner, and a Review Gate outcome before the final verdict.
+- Review output carries findings, severity, confidence, evidence gap, counter-validation result, route or next owner, and a Review Gate outcome before the final verdict.
+- The review cannot present analysis to the user until counter-validation confirms it or reopens material gaps.
 - Retained remediation plans are authored by `internal-gateway-writing-plans`.
 - The gateway stops before fixes.
diff --git a/.github/skills/internal-gateway-review/references/review-gate.md b/.github/skills/internal-gateway-review/references/review-gate.md
index bb3e0f1..cb6f17d 100644
--- a/.github/skills/internal-gateway-review/references/review-gate.md
+++ b/.github/skills/internal-gateway-review/references/review-gate.md
@@ -8,15 +8,18 @@ Use this reference when `internal-gateway-review` needs to package findings befo
 - Severity
 - Confidence
 - Evidence gap
+- Counter-validation
 - Route or next owner
 - Review Gate outcome
 
 ## Gate States
 
-- `review gate: satisfied` when the findings are specific, routed, and ready for the user-visible verdict.
-- `review gate: reopen` when material evidence is missing or the remediation choice needs more challenge.
+- `review gate: satisfied` when the findings are specific, routed, counter-validated, and ready for the user-visible verdict.
+- `review gate: reopen` when material evidence is missing, counter-validation exposes a material flaw, or the remediation choice needs more challenge.
 
 ## Boundary
 
 - Keep the gate visible before any fixes.
+- Run counter-validation before the final user-visible verdict; challenge each finding for evidence, severity, route, and contrary proof.
+- Report only material self-critique results: corrections, confidence changes, evidence gaps, or confirmation that no material issue was found.
 - Use the gate to route each actionable finding to the smallest next owner.
diff --git a/tests/test_workflow_review_contract.py b/tests/test_workflow_review_contract.py
index 50bb3c2..6d36a85 100644
--- a/tests/test_workflow_review_contract.py
+++ b/tests/test_workflow_review_contract.py
@@ -57,6 +57,8 @@ def test_review_gateway_exists_and_stops_before_fixes() -> None:
     assert "severity" in review_gate_lower
     assert "confidence" in review_gate_lower
     assert "evidence gap" in review_gate_lower
+    assert "counter-validation" in skill_text
+    assert "counter-validation" in review_gate_lower
     assert "route or next owner" in review_gate_lower
 
 

From 653beb43e4fcdbb94c173f11a53c421be1f5f2ed Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Fri, 19 Jun 2026 14:22:28 +0200
Subject: [PATCH 04/11] feat: Enhance Python project and script guidelines with
 new logging, reporting, and dependency management practices

---
 .../references/anti-patterns-python.md        |   3 +-
 .../skills/internal-python-project/SKILL.md   |  16 ++-
 .../references/common-mistakes.md             |   2 +
 .../references/logging-and-reporting.md       | 101 ++++++++++++++++
 .../skills/internal-python-script/SKILL.md    |  19 ++-
 .../references/common-mistakes.md             |   2 +
 .../references/layout-and-templates.md        |  42 +++++--
 .../references/reporting.md                   | 114 ++++++++++++++++++
 .github/skills/internal-python/SKILL.md       |   3 +
 ...est_repository_workflow_policy_contract.py |  16 +++
 10 files changed, 299 insertions(+), 19 deletions(-)
 create mode 100644 .github/skills/internal-python-project/references/logging-and-reporting.md
 create mode 100644 .github/skills/internal-python-script/references/reporting.md

diff --git a/.github/skills/internal-code-review/references/anti-patterns-python.md b/.github/skills/internal-code-review/references/anti-patterns-python.md
index 26f7b69..53df0c9 100644
--- a/.github/skills/internal-code-review/references/anti-patterns-python.md
+++ b/.github/skills/internal-code-review/references/anti-patterns-python.md
@@ -23,6 +23,7 @@ Baseline owner: `internal-python`
 | PY-M07 | `print()` instead of `logging` in application/library code | No log level control in production |
 | PY-M08 | Missing unit tests for new public functions | Violates test coverage mandate |
 | PY-M09 | Python tests outside repository-root `tests/` or without mirrored source paths | Breaks repository test discoverability and ownership mapping |
+| PY-M10 | `rich`, emoji, tables, or panels outside human-facing CLI/reporting boundaries | Mixes terminal UI with importable logic or machine-readable output such as JSON |
 
 ## Minor
 
@@ -89,5 +90,5 @@ import logging
 logger = logging.getLogger(__name__)
 
 def process(data: list[dict]) -> None:
-    logger.info("ℹ️ Processing %d items", len(data))
+    logger.info("Processing %d items", len(data))
 ```
diff --git a/.github/skills/internal-python-project/SKILL.md b/.github/skills/internal-python-project/SKILL.md
index fa44785..509244e 100644
--- a/.github/skills/internal-python-project/SKILL.md
+++ b/.github/skills/internal-python-project/SKILL.md
@@ -5,6 +5,11 @@ description: Use when creating or modifying Python package or application code w
 
 # Python Project Skill
 
+## Referenced skills
+
+- `internal-python-script`: route CLI adapters, direct operator execution, and rich console reporting boundaries.
+- `internal-tdd`: load for bugfixes, features, or project behavior changes with a meaningful public or service seam.
+
 ## When to use
 
 - Services, use cases, adapters, packages, and modules in Python applications.
@@ -38,17 +43,23 @@ description: Use when creating or modifying Python package or application code w
 - Choose async only when the workload is I/O-bound and the surrounding stack supports it cleanly.
 - Keep request or transport models, domain logic, and persistence concerns in separate modules.
 - Prefer a domain/service/adapter decomposition before adding generic catch-all modules.
-- Keep reusable module and service logs neutral or structured; reserve emoji log formatting for outer operator-facing entrypoints.
+- Keep reusable module and service logs neutral, structured, or framework-native. Log events should be parsable, searchable, and useful in production.
+- Design professional reporting as a boundary concern: core project code returns typed results, events, or DTOs; adapters decide whether to render JSON, HTTP responses, framework logs, metrics, or human-facing CLI reports.
+- No emoji or `rich` rendering inside importable domain, service, persistence, framework modules, or machine-readable output paths such as JSON. Use `rich` only in human-facing CLI adapter reporting.
+- If a project exposes a CLI adapter, keep the CLI adapter thin and route its operator-facing reporting to the script boundary. A CLI adapter may use an `ExecutionReporter`; the core project code should not know that reporter exists.
 
 Load `references/examples.md` when you need a minimal module or test example.
 
+Load `references/logging-and-reporting.md` when project code needs a professional logging/reporting layout, structured log context, result DTOs, adapter-owned rendering, or JSON versus human-output boundaries.
+
 ## Testing
 
 - Follow the repository pytest defaults.
 - BDD-like names: `given_when_then` style.
 - Prefer fixtures, parameterization, and mocking only when they reduce duplication or isolate real external boundaries.
 - Use coverage reports to close meaningful behavioral gaps, not as a blanket 100% doctrine.
-- For modify tasks: edit implementation first, run existing tests, then update tests only for intentional behavior changes.
+- For bugfixes, features, and intentional behavior changes, start test-first through the public API, service boundary, adapter contract, or framework-owned seam: add or update the failing test, confirm it fails for the intended reason, then implement the smallest fix.
+- For refactors, prose-only updates, generated fixtures, or mechanical formatting with no executable behavior change, run existing focused tests and syntax validation instead of manufacturing speculative tests.
 
 ## Architecture and framework guidance
 
@@ -70,5 +81,6 @@ Load `references/common-mistakes.md` for the full mistake table.
 ## Validation
 
 - `python -m compileall <paths>` (syntax check)
+- `pip install --require-hashes -r requirements.txt` (dependency integrity check, only when requirements change)
 - `pytest tests/` (run tests)
 - Lint with project's configured linter.
diff --git a/.github/skills/internal-python-project/references/common-mistakes.md b/.github/skills/internal-python-project/references/common-mistakes.md
index 592bdf6..a4c1eb4 100644
--- a/.github/skills/internal-python-project/references/common-mistakes.md
+++ b/.github/skills/internal-python-project/references/common-mistakes.md
@@ -6,8 +6,10 @@
 | Mutable default arguments (`def f(items=[])`) | Shared state between calls — classic Python gotcha | Use `None` default + create inside function |
 | Bare `except:` or `except Exception:` | Swallows `KeyboardInterrupt`, `SystemExit` | Catch specific exceptions |
 | No type hints on public API | Hard to understand contracts, no static analysis | Add type hints on function signatures |
+| Updating dependency requirements without refreshed hashes | Reproducible installs break or drift silently | Regenerate exact pins and hashes, then validate with `pip install --require-hashes -r requirements.txt` |
 | Tests that depend on execution order | Fragile test suite, non-deterministic failures | Each test must be self-contained |
 | Forcing async into CPU-bound or simple flows | Adds complexity without throughput benefit | Keep it synchronous unless I/O concurrency is the real bottleneck |
 | Mocking internal implementation details | Makes tests brittle and hides real regressions | Mock only true external boundaries |
+| Using `rich`, emoji, tables, or panels outside human-facing CLI adapter reporting | Mixes terminal UI with project behavior or machine-readable output such as JSON | Keep project logs neutral or structured, keep data output plain, and put `rich` reporting in a CLI adapter |
 | Treating line coverage as the goal | Inflates test volume without improving defect detection | Target coverage around changed behavior and risky paths |
 | God classes with 10+ methods | Hard to test, hard to reason about | Split by responsibility into focused classes |
diff --git a/.github/skills/internal-python-project/references/logging-and-reporting.md b/.github/skills/internal-python-project/references/logging-and-reporting.md
new file mode 100644
index 0000000..682e833
--- /dev/null
+++ b/.github/skills/internal-python-project/references/logging-and-reporting.md
@@ -0,0 +1,101 @@
+# Python Project Logging And Reporting
+
+Use this reference when Python project code needs professional logging, reporting layout, structured log context, result DTOs, adapter-owned rendering, or a clear boundary between JSON/data output and human-facing CLI reporting.
+
+## Boundary
+
+- Project internals should expose behavior through typed results, domain events, DTOs, return values, exceptions, or framework contracts.
+- Domain, service, persistence, and framework modules should use standard `logging` or the repository framework's native logging.
+- Logs from importable modules should be neutral, structured when useful, and parsable in production.
+- Human-facing rendering belongs to adapters: CLI, admin command, report command, or delivery script.
+- Machine-readable outputs such as JSON, API responses, event payloads, or exported files must stay plain data. Do not decorate them with `rich`, emoji, color, panels, or tables.
+- A CLI adapter may use the script `ExecutionReporter` pattern or `rich`, but the project core should not import or know about that reporter.
+
+## Professional Layout
+
+Prefer this ownership split when the project needs both reusable behavior and operator-facing reporting:
+
+```text
+src/{package}/
+├── domain/          # entities, value objects, domain rules; no logging UI
+├── services/        # use cases; structured logging and typed results
+├── adapters/
+│   ├── cli.py       # optional human-facing rendering boundary
+│   ├── http.py      # framework/API response boundary
+│   └── persistence.py
+└── observability.py # logger setup helpers only when the project owns setup
+```
+
+Use existing repository structure first. Do not create these folders just to satisfy the shape when the current project has a clearer convention.
+
+## Logging Shape
+
+Use stable event names and explicit context. Prefer values that help production search, alerting, and diagnosis.
+
+```python
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class ImportSummary:
+    imported_count: int
+    skipped_count: int
+    output_path: Path
+
+
+def import_records(source_path: Path, output_path: Path) -> ImportSummary:
+    logger.info(
+        "records_import_started",
+        extra={"source_path": source_path.as_posix(), "output_path": output_path.as_posix()},
+    )
+
+    summary = ImportSummary(imported_count=12, skipped_count=1, output_path=output_path)
+
+    logger.info(
+        "records_import_completed",
+        extra={
+            "imported_count": summary.imported_count,
+            "skipped_count": summary.skipped_count,
+            "output_path": summary.output_path.as_posix(),
+        },
+    )
+    return summary
+```
+
+## Adapter Rendering
+
+Adapters translate project results into the output contract for that boundary.
+
+```python
+def summary_to_json(summary: ImportSummary) -> dict[str, object]:
+    return {
+        "imported_count": summary.imported_count,
+        "skipped_count": summary.skipped_count,
+        "output_path": summary.output_path.as_posix(),
+    }
+
+
+def render_human_summary(summary: ImportSummary, reporter: object) -> None:
+    reporter.summary(
+        status="completed",
+        counts={"imported": summary.imported_count, "skipped": summary.skipped_count},
+        produced_files=[summary.output_path],
+        diagnostics=[],
+    )
+```
+
+The JSON adapter returns plain data. The human adapter may use `ExecutionReporter` or `rich` if the CLI/reporting boundary owns that dependency.
+
+## Review Checklist
+
+- Does the core return typed results or framework-native responses instead of printing?
+- Are logs searchable and useful without terminal formatting?
+- Are secrets, tokens, bearer values, passwords, credentials, and sensitive payloads omitted or redacted?
+- Is JSON or other machine-readable output plain data?
+- If `rich` appears, is it isolated to a human-facing CLI/reporting adapter with a dependency decision note?
diff --git a/.github/skills/internal-python-script/SKILL.md b/.github/skills/internal-python-script/SKILL.md
index 95ddd2b..678f3a5 100644
--- a/.github/skills/internal-python-script/SKILL.md
+++ b/.github/skills/internal-python-script/SKILL.md
@@ -5,6 +5,11 @@ description: Use when creating or modifying standalone Python scripts, CLIs, or
 
 # Python Script Skill
 
+## Referenced skills
+
+- `internal-python-project`: route away when imported package, application, service, or framework behavior becomes the primary contract.
+- `internal-tdd`: load for bugfixes, features, or script behavior changes with a meaningful executable seam.
+
 ## When to use
 
 - New standalone Python scripts.
@@ -32,12 +37,15 @@ description: Use when creating or modifying standalone Python scripts, CLIs, or
 - For operator-facing script work, crossing the 400-line threshold should move toward a toolkit or project structure according to the primary contract, not an ever-growing single entrypoint.
 - Keep policy checks focused on maintained source; generated outputs and large fixture data are excluded unless directly edited.
 - Prefer `argparse`, `pathlib.Path`, and small helper functions for operator-facing tools.
-- Keep emoji logs at operator-facing boundaries such as start, success, warning, and failure states; keep reusable helpers free of decorative log formatting.
+- Keep operator-facing console reporting centralized in a dedicated reporter, for example `ExecutionReporter`. Application logic should call semantic reporter methods instead of constructing styled strings or scattered `print()` calls.
+- Use `rich` as the preferred console rendering library for polished human-facing CLI reports when the terminal experience is part of the contract. Keep it out of `--format json`, other machine-readable outputs, and reusable helper logic.
+- Keep emoji, panels, tables, and color at human-facing boundaries such as banners, sections, success, warning, error, and summaries. Keep reusable helpers and machine-readable output paths free of decorative log formatting.
+- Load `references/reporting.md` when a script needs professional console reporting, `rich` rendering, an `ExecutionReporter` shape, redaction rules, or verbose/debug output boundaries.
 - When a tool can be called from subdirectories, resolve the repository root explicitly instead of assuming the current working directory.
 - Use type hints on non-trivial public helpers and CLI-facing boundaries.
 - Use `asyncio` only when the script truly coordinates multiple I/O-bound tasks.
 - Reach for `pathlib`, context managers, and small helper functions before adding framework-like structure to a script.
-- Add machine-readable output such as `--format json` only when the tool has a real automation consumer. Keep text output as the default operator path.
+- Add machine-readable output such as `--format json` only when the tool has a real automation consumer. Keep text output as the default operator path, and do not decorate machine-readable output with `rich`, emoji, color, or tables.
 - When machine-readable output can become large and the script is agent-facing, add a bounded mode such as `--format compact` that preserves status, blocker or finding counts, key path evidence, and next action without dumping full detail.
 - Keep full `--format json` available for durable audit/debug use; do not replace it with compact mode.
 
@@ -64,12 +72,15 @@ Dependency decision note
 - Keep the note short and task-specific.
 - Compare the standard library with realistic third-party candidates.
 - If the final choice uses external libraries, create or update the local `requirements.txt` before finishing the task.
+- Keep exact pins and current hashes in `requirements.txt`. Use `pip-compile --generate-hashes` or an equivalent repository-approved workflow, then validate with `pip install --require-hashes -r requirements.txt` when the requirements file changes.
 - If several entrypoints share the same lock file, record the decision once at the shared toolkit `requirements.txt` rather than repeating it in every script.
 
 ## Layout and templates
 
 Load `references/layout-and-templates.md` when you need the default folder layout, a repo-aligned multi-tool toolkit layout, a minimal entry point, a hash-locked `requirements.txt`, or the launcher pattern.
 
+Load `references/reporting.md` when the script needs a richer `ExecutionReporter`, `rich` console rendering, status tables, redaction behavior, or a final operator summary.
+
 Keep these rules visible while drafting:
 
 - Use a dedicated tool folder or toolkit root rather than a loose top-level `.py` file.
@@ -82,7 +93,8 @@ Keep these rules visible while drafting:
 
 - Follow the repository pytest defaults.
 - Use coverage reports to inspect missing behavior on touched code, not to force blanket 100% coverage.
-- For modify tasks: edit implementation first, run existing tests, then update tests only for intentional behavior changes.
+- For bugfixes, features, and intentional behavior changes, start test-first through the public CLI or stable helper seam: add or update the failing test, confirm it fails for the intended reason, then implement the smallest fix.
+- For refactors, prose-only updates, generated fixtures, or mechanical formatting with no executable behavior change, run the existing focused tests plus `py_compile` or `compileall` instead of manufacturing speculative tests.
 - Prefer existing repository commands such as `make lint`, `make test`, or a shared script runner before inventing a one-off validation path.
 
 ## Runtime guidance
@@ -99,5 +111,6 @@ Load `references/common-mistakes.md` for the full mistake table.
 
 - `python -m py_compile <script_name>.py` (syntax check)
 - `bash -n run.sh` (launcher syntax check, only when `run.sh` exists)
+- `pip install --require-hashes -r requirements.txt` (dependency integrity check, only when requirements change)
 - `pytest tests/` (run tests)
 - `python -m compileall <changed_paths>` or the repository's canonical shared runner when the tool already lives inside a maintained toolkit
diff --git a/.github/skills/internal-python-script/references/common-mistakes.md b/.github/skills/internal-python-script/references/common-mistakes.md
index d1f551c..e104cfd 100644
--- a/.github/skills/internal-python-script/references/common-mistakes.md
+++ b/.github/skills/internal-python-script/references/common-mistakes.md
@@ -9,6 +9,7 @@
 | No argument parsing | Caller has to modify script source to change behavior | Use `argparse` for any configurable parameter |
 | Installing deps globally or without hash-locked version pinning | Non-reproducible environment and hidden setup drift | Keep dependencies in the local `requirements.txt` with exact pins and hashes |
 | Adding an empty `requirements.txt` to a stdlib-only tool | Adds noise and implies missing setup steps | Omit `requirements.txt` when the script uses only the standard library |
+| Updating `requirements.txt` without refreshed hashes | Breaks reproducible installs and hides dependency drift | Regenerate exact pins and hashes, then validate with `pip install --require-hashes -r requirements.txt` |
 | Wrapping a stdlib-only script in Bash | Adds setup indirection without solving a real dependency problem | Document direct `python3 <script>.py` execution and skip the wrapper |
 | Shipping a loose `.py` file with undocumented setup steps | Users must guess how to run the tool safely | Generate a self-contained folder and add `run.sh` plus `requirements.txt` only when external packages are needed |
 | Treating a multi-entrypoint toolkit as app code just because it has `lib/` and tests | Pushes script tooling into the wrong guidance lane | Keep it in `internal-python-script` when the primary contract is still direct execution |
@@ -17,4 +18,5 @@
 | Adding JSON output without a real machine consumer | Increases surface area and maintenance cost | Keep text output first and add `--format json` only when automation needs it |
 | Defaulting to stdlib without comparing mature libraries | Leaves avoidable boilerplate, edge cases, and custom parsing logic in the script | Write the dependency decision note first and choose the option that makes the final code simpler |
 | Rejecting a useful dependency just to keep dependency count low | Optimizes the wrong thing and increases custom code | Optimize for simpler final code and justified value, not dependency minimization |
+| Adding `rich` because output looks nicer, without a human-facing reporting contract | Adds dependency cost without a clear user benefit and can corrupt JSON or other machine-readable output | Use `rich` for polished human-facing console reporting only, keep data output plain, and record the dependency decision |
 | Forcing async or framework abstractions into a simple tool | Raises complexity without improving the script | Keep the script synchronous and direct unless concurrency is essential |
diff --git a/.github/skills/internal-python-script/references/layout-and-templates.md b/.github/skills/internal-python-script/references/layout-and-templates.md
index 31f9895..79a89ee 100644
--- a/.github/skills/internal-python-script/references/layout-and-templates.md
+++ b/.github/skills/internal-python-script/references/layout-and-templates.md
@@ -37,7 +37,6 @@ repo-root/
 
 - Keep each entrypoint thin and import reusable helpers from the local `lib/` package.
 - Keep the dependency decision note and pinned hashes in the shared `requirements.txt`.
-- Use one shared `run.sh` to create or reuse `.venv`, install locked requirements, and dispatch to the selected Python entrypoint.
 
 ## Minimal Python Entry Point
 
@@ -52,32 +51,47 @@ import argparse
 import sys
 
 
-def log_info(msg: str) -> None:
-    print(f"ℹ️  {msg}")
+class ExecutionReporter:
+    def __init__(self, *, verbose: bool = False) -> None:
+        self.verbose = verbose
 
+    def _emit(self, message: str, *, error: bool = False) -> None:
+        stream = sys.stderr if error else sys.stdout
+        print(message, file=stream)
 
-def log_error(msg: str) -> None:
-    print(f"❌ {msg}", file=sys.stderr)
+    def detail(self, message: str) -> None:
+        if self.verbose:
+            self._emit(f"ℹ️  {message}")
 
+    def error(self, message: str) -> None:
+        self._emit(f"❌ {message}", error=True)
 
-def log_success(msg: str) -> None:
-    print(f"✅ {msg}")
+    def step(self, message: str) -> None:
+        self._emit(f"• {message}")
 
+    def success(self, message: str) -> None:
+        self._emit(f"✅ {message}")
 
-def main() -> None:
+
+def main() -> int:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--target", required=True, help="Target to process")
+    parser.add_argument("--verbose", action="store_true", help="Show technical details.")
     args = parser.parse_args()
 
-    log_info(f"Processing {args.target}")
+    reporter = ExecutionReporter(verbose=args.verbose)
+    reporter.step(f"Processing {args.target}")
     # ... logic ...
-    log_success("Done")
+    reporter.success("Done")
+    return 0
 
 
 if __name__ == "__main__":
-    main()
+    raise SystemExit(main())
 ```
 
+For richer operator-facing output, use `references/reporting.md` and replace this stdlib reporter with a `rich`-backed `ExecutionReporter` after recording the dependency decision.
+
 ## Repo-Aligned Toolkit Entry Point
 
 Use this pattern when a repository-maintained toolkit exposes multiple entrypoints and a shared `lib/` package.
@@ -96,7 +110,8 @@ from __future__ import annotations
 import argparse
 from pathlib import Path
 
-from lib.shared import find_repo_root, log_info, render_json
+from lib.reporting import ExecutionReporter
+from lib.shared import find_repo_root, render_json
 
 
 def parse_args() -> argparse.Namespace:
@@ -108,12 +123,13 @@ def parse_args() -> argparse.Namespace:
 
 def main() -> int:
     args = parse_args()
+  reporter = ExecutionReporter()
     root = find_repo_root(Path(args.root))
     payload = {"root": root.as_posix()}
     if args.format == "json":
         print(render_json(payload))
     else:
-        log_info(f"Resolved repository root: {root.as_posix()}")
+    reporter.detail(f"Resolved repository root: {root.as_posix()}")
     return 0
 
 
diff --git a/.github/skills/internal-python-script/references/reporting.md b/.github/skills/internal-python-script/references/reporting.md
new file mode 100644
index 0000000..1c0df08
--- /dev/null
+++ b/.github/skills/internal-python-script/references/reporting.md
@@ -0,0 +1,114 @@
+# Python Script Reporting
+
+Use this reference when a Python script or operator-facing toolkit needs polished human-facing console reporting, `rich` rendering, status tables, redaction behavior, or a final summary.
+
+## Boundary
+
+- Keep reporting at the human-facing CLI or operator adapter boundary.
+- Keep application logic, reusable helpers, project modules, and machine-readable output paths free of `rich`, panels, tables, color, and emoji formatting.
+- Let application logic call semantic reporter methods such as `step()`, `success()`, or `summary()`; do not build styled strings inside business logic.
+- Reserve plain `print()` for machine-readable output boundaries such as `--format json`; human output should go through the reporter.
+
+## Dependency Decision
+
+Use `rich` when the human-facing terminal experience is part of the script contract. Keep the dependency decision close to the owning `requirements.txt`.
+
+```text
+Dependency decision note
+- Candidates: stdlib print/logging, rich
+- Final choice: rich
+- Why: the tool has operator-facing sections, status tables, warnings, and summaries where consistent terminal rendering reduces mistakes.
+```
+
+After adding or updating dependencies, regenerate exact pins and hashes with `pip-compile --generate-hashes` or the repository-approved equivalent, then validate with `pip install --require-hashes -r requirements.txt`.
+
+## Reporter Shape
+
+Preferred methods:
+
+- `banner(title, *, run_id, mode, scope, output_path, options)`
+- `section(title, description=None)`
+- `step(message)`
+- `detail(message)`
+- `success(message)`
+- `warning(message)`
+- `error(message)`
+- `table(title, columns, rows)`
+- `summary(status, counts, produced_files, diagnostics)`
+
+Use concise, deduplicated retry messages. Put technical details behind `--verbose` or `--debug`. Never log tokens, bearer values, passwords, secrets, credentials, or sensitive payloads.
+
+## Rich Skeleton
+
+```python
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping, Sequence
+from pathlib import Path
+
+from rich.console import Console
+from rich.markup import escape
+from rich.panel import Panel
+from rich.table import Table
+
+
+class ExecutionReporter:
+    def __init__(self, *, console: Console | None = None, verbose: bool = False) -> None:
+        self.console = console or Console()
+        self.verbose = verbose
+
+    def banner(
+        self,
+        title: str,
+        *,
+        run_id: str,
+        mode: str,
+        scope: str,
+        output_path: Path | None,
+        options: Mapping[str, object],
+    ) -> None:
+        lines = [
+            f"Run: {escape(run_id)}",
+            f"Mode: {escape(mode)}",
+            f"Scope: {escape(scope)}",
+        ]
+        if output_path is not None:
+            lines.append(f"Output: {escape(output_path.as_posix())}")
+        if options:
+            rendered = ", ".join(f"{escape(str(key))}={escape(str(value))}" for key, value in options.items())
+            lines.append(f"Options: {rendered}")
+        self.console.print(Panel("\n".join(lines), title=escape(title), border_style="blue"))
+
+    def section(self, title: str, description: str | None = None) -> None:
+        self.console.rule(f"ℹ️  {escape(title)}")
+        if description:
+            self.console.print(escape(description))
+
+    def step(self, message: str) -> None:
+        self.console.print(f"• {escape(message)}")
+
+    def detail(self, message: str) -> None:
+        if self.verbose:
+            self.console.print(f"ℹ️  {escape(message)}", style="dim")
+
+    def success(self, message: str) -> None:
+        self.console.print(f"✅ {escape(message)}", style="green")
+
+    def warning(self, message: str) -> None:
+        self.console.print(f"⚠️  {escape(message)}", style="yellow")
+
+    def error(self, message: str) -> None:
+        self.console.print(f"❌ {escape(message)}", style="red")
+
+    def table(self, title: str, columns: Sequence[str], rows: Iterable[Sequence[object]]) -> None:
+        table = Table(title=escape(title))
+        for column in columns:
+            table.add_column(escape(column))
+        for row in rows:
+            table.add_row(*(escape(str(value)) for value in row))
+        self.console.print(table)
+```
+
+## Summary Expectations
+
+End operator-facing runs with a compact summary that includes final status, produced files, relevant counts, diagnostics, and remaining gaps. Use tables for repeated file or diagnostic rows, and keep secrets redacted even in debug mode.
diff --git a/.github/skills/internal-python/SKILL.md b/.github/skills/internal-python/SKILL.md
index 33abeff..5ad9063 100644
--- a/.github/skills/internal-python/SKILL.md
+++ b/.github/skills/internal-python/SKILL.md
@@ -27,6 +27,8 @@ every Python edit; load them only when the task proves script or project depth.
 
 ## Baseline
 
+- Classify Python work by primary contract before choosing structure: direct operator execution belongs to `internal-python-script`; importable package, application, service, or framework behavior belongs to `internal-python-project`.
+- Do not classify by file count alone. A multi-module toolkit can remain script-owned when its main contract is CLI execution.
 - Prefer early returns, guard clauses, clear names, and readable control flow.
 - Add type hints on public or non-trivial function signatures.
 - Treat 300 lines as a review threshold for cohesive Python files.
@@ -38,6 +40,7 @@ every Python edit; load them only when the task proves script or project depth.
 - Add or update tests for testable logic.
 - Do not vendor libraries, wheelhouses, copied site-packages, or fallback dependency mirrors.
 - If external packages are introduced, keep exact pins and hashes in the owning requirements file.
+- Keep human-facing console reporting separate from reusable Python logging and machine-readable output. Script or CLI adapter boundaries may use `rich`; project/package internals and JSON-style output paths should stay neutral, structured, or plain data.
 
 ## Dependency And Runtime Depth
 
diff --git a/tests/test_repository_workflow_policy_contract.py b/tests/test_repository_workflow_policy_contract.py
index 6bc12b5..d9ed27d 100644
--- a/tests/test_repository_workflow_policy_contract.py
+++ b/tests/test_repository_workflow_policy_contract.py
@@ -269,6 +269,13 @@ def test_technology_skill_modularity_contract_is_owned_and_searchable() -> None:
     assert "project" in python_script_skill_text
     assert "generated" in python_script_skill_text
     assert "fixture" in python_script_skill_text
+    assert "## referenced skills" in python_script_skill_text
+    assert "internal-tdd" in python_script_skill_text
+    assert "references/reporting.md" in python_script_skill_text
+    assert "executionreporter" in python_script_skill_text
+    assert "rich" in python_script_skill_text
+    assert "test-first" in python_script_skill_text
+    assert "pip install --require-hashes" in python_script_skill_text
 
     assert "entrypoint" in bash_script_skill_text
     assert "sourced helper" in bash_script_skill_text
@@ -281,3 +288,12 @@ def test_technology_skill_modularity_contract_is_owned_and_searchable() -> None:
     assert "dto" in java_project_skill_text
 
     assert "domain/service/adapter" in python_project_skill_text
+    assert "## referenced skills" in python_project_skill_text
+    assert "internal-tdd" in python_project_skill_text
+    assert "structured" in python_project_skill_text
+    assert "rich" in python_project_skill_text
+    assert "cli adapter" in python_project_skill_text
+    assert "no emoji" in python_project_skill_text
+    assert "references/logging-and-reporting.md" in python_project_skill_text
+    assert "typed results" in python_project_skill_text
+    assert "machine-readable" in python_project_skill_text

From 86fcd46544a926a2af0497cd2e9db1469f26e47d Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Fri, 19 Jun 2026 14:25:47 +0200
Subject: [PATCH 05/11] feat: Refactor logging in anti-patterns and add Python
 fenced block extraction in tests

---
 .../references/anti-patterns-python.md        |  2 +-
 .../references/layout-and-templates.md        |  4 ++--
 ...est_repository_workflow_policy_contract.py | 20 +++++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/.github/skills/internal-code-review/references/anti-patterns-python.md b/.github/skills/internal-code-review/references/anti-patterns-python.md
index 53df0c9..9b2473e 100644
--- a/.github/skills/internal-code-review/references/anti-patterns-python.md
+++ b/.github/skills/internal-code-review/references/anti-patterns-python.md
@@ -62,7 +62,7 @@ except:
 try:
     result = fetch_data()
 except requests.RequestException as exc:
-    logger.warning("⚠️ Fetch failed: %s", exc)
+    logger.warning("Fetch failed: %s", exc)
     raise
 ```
 
diff --git a/.github/skills/internal-python-script/references/layout-and-templates.md b/.github/skills/internal-python-script/references/layout-and-templates.md
index 79a89ee..efb812b 100644
--- a/.github/skills/internal-python-script/references/layout-and-templates.md
+++ b/.github/skills/internal-python-script/references/layout-and-templates.md
@@ -123,13 +123,13 @@ def parse_args() -> argparse.Namespace:
 
 def main() -> int:
     args = parse_args()
-  reporter = ExecutionReporter()
+    reporter = ExecutionReporter()
     root = find_repo_root(Path(args.root))
     payload = {"root": root.as_posix()}
     if args.format == "json":
         print(render_json(payload))
     else:
-    reporter.detail(f"Resolved repository root: {root.as_posix()}")
+        reporter.detail(f"Resolved repository root: {root.as_posix()}")
     return 0
 
 
diff --git a/tests/test_repository_workflow_policy_contract.py b/tests/test_repository_workflow_policy_contract.py
index d9ed27d..116bbf3 100644
--- a/tests/test_repository_workflow_policy_contract.py
+++ b/tests/test_repository_workflow_policy_contract.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 from pathlib import Path
 
 import yaml
@@ -14,6 +15,10 @@ def section_between(body: str, heading: str) -> str:
     return section.split("\n## ", 1)[0]
 
 
+def python_fenced_blocks(body: str) -> list[str]:
+    return re.findall(r"```python\n(.*?)\n```", body, flags=re.DOTALL)
+
+
 def test_github_pr_skill_owns_pr_merge_and_terminal_state_guardrails() -> None:
     agents_text = read_text("AGENTS.md")
     copilot_text = read_text(".github/copilot-instructions.md")
@@ -297,3 +302,18 @@ def test_technology_skill_modularity_contract_is_owned_and_searchable() -> None:
     assert "references/logging-and-reporting.md" in python_project_skill_text
     assert "typed results" in python_project_skill_text
     assert "machine-readable" in python_project_skill_text
+
+
+def test_python_skill_template_snippets_are_copyable_python() -> None:
+    template_paths = (
+        ".github/skills/internal-python-project/references/logging-and-reporting.md",
+        ".github/skills/internal-python-script/references/layout-and-templates.md",
+        ".github/skills/internal-python-script/references/reporting.md",
+    )
+
+    for relative_path in template_paths:
+        blocks = python_fenced_blocks(read_text(relative_path))
+        assert blocks, f"{relative_path} should contain Python fenced snippets"
+
+        for index, block in enumerate(blocks, start=1):
+            compile(block, f"{relative_path}:python-block-{index}", "exec")

From e6be20464b0dc138a2f1ea1effc10ec1aaf8624b Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Fri, 19 Jun 2026 19:06:37 +0200
Subject: [PATCH 06/11] feat: Enhance internal gateway review documentation
 with detailed validation and transition guidelines

---
 .../skills/internal-gateway-review/SKILL.md   | 40 +++++++++++++++----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/.github/skills/internal-gateway-review/SKILL.md b/.github/skills/internal-gateway-review/SKILL.md
index 426194a..eff1662 100644
--- a/.github/skills/internal-gateway-review/SKILL.md
+++ b/.github/skills/internal-gateway-review/SKILL.md
@@ -13,17 +13,40 @@ description: Use when repository-owned work needs same-conversation defect-first
 - `internal-gateway-writing-plans`
 - `internal-agent-support-next-step`
 
-Portable review orchestrator. This skill owns review scope, lens selection,
-findings consolidation, critical support, and remediation-plan transition. It
-does not apply fixes.
+Treat this section as an audit and routing index, not a preload bundle. Load a
+referenced skill only when the domain, finding, blocker, or phase requires it.
 
-Before any user-visible review verdict, run a counter-validation pass that
-challenges the draft analysis for missing evidence, false positives, severity
-inflation, route errors, and ignored contrary evidence. Revise or reopen the
-review before presenting the analysis when the critique exposes a material gap use `internal-gateway-critical-master`.
+Portable review orchestrator. Owns review scope, lens selection, findings
+consolidation, critical support, and remediation-plan transition. It does not
+apply fixes.
+
+Before any user-visible verdict, run a lightweight internal check for evidence,
+severity, false positives, contrary evidence, and scope narrowing. Load
+`internal-gateway-critical-master` only for a material challenge. Revise or
+reopen when the check exposes a material gap.
 
 See `references/review-gate.md` for the review output contract and gate states.
 
+## Token Discipline
+
+Inspect diff and failing evidence first; avoid broad repository scans unless an
+evidence gap requires one; never preload referenced skills; show at most 5
+material findings unless exhaustive review is requested; summarize omitted
+low-risk observations separately, not as findings.
+
+## Review To Plan Transition
+
+Before creating, accepting, or routing a remediation plan, keep the review
+defect-first and map every original material finding: `id`, `status` (`planned`,
+`deferred`, `rejected`, or `residual`), `reason`, `next owner`, and `validation
+expected`.
+
+If remediation steps cover less than 100% of material findings, label the
+output `partial remediation plan` and keep residual, deferred, or rejected
+findings visible. A retained mini-plan is a coverage-preserving handoff authored
+by `internal-gateway-writing-plans`; its job is plan creation, not fixes. This
+gateway does not choose the execution owner.
+
 ## When to use
 
 - The user asks for review of a concrete artifact, diff, workflow, or bundle.
@@ -35,5 +58,6 @@ See `references/review-gate.md` for the review output contract and gate states.
 - Review flow preserves compact context: prioritize diff and failing evidence first, then expand only when an evidence gap remains.
 - Review output carries findings, severity, confidence, evidence gap, counter-validation result, route or next owner, and a Review Gate outcome before the final verdict.
 - The review cannot present analysis to the user until counter-validation confirms it or reopens material gaps.
-- Retained remediation plans are authored by `internal-gateway-writing-plans`.
+- Remediation-plan transitions preserve a 100% material-finding coverage map or explicitly declare a `partial remediation plan`.
+- Retained remediation plans are authored by `internal-gateway-writing-plans` and preserve the coverage map.
 - The gateway stops before fixes.

From 4eb9afd8832f659d63c45b8025aa4872b3b52289 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Fri, 19 Jun 2026 19:50:21 +0200
Subject: [PATCH 07/11] feat: Enhance Python guidelines with new configuration
 handling and formatting practices

---
 .github/instructions/internal-python.instructions.md | 3 +++
 .github/skills/internal-python-project/SKILL.md      | 5 +++++
 .github/skills/internal-python-script/SKILL.md       | 5 +++++
 .github/skills/internal-python/SKILL.md              | 4 ++++
 4 files changed, 17 insertions(+)

diff --git a/.github/instructions/internal-python.instructions.md b/.github/instructions/internal-python.instructions.md
index d94e990..3ef22e5 100644
--- a/.github/instructions/internal-python.instructions.md
+++ b/.github/instructions/internal-python.instructions.md
@@ -11,7 +11,10 @@ This file is optimized for Copilot code review and should produce only evidenced
 - Verify guard clauses and error handling make failure modes explicit.
 - Flag unsafe input handling, shell invocation, or filesystem side effects.
 - Check function and module boundaries for readability and cohesion.
+- Flag behavioral configuration buried in helpers, services, or library modules instead of centralized at the correct boundary: a script entrypoint, `Configuration` section, settings module, adapter, application factory, or composition root.
+- Do not flag stable domain invariants merely because they are constants near domain code.
 - Verify type hints and public interfaces stay consistent with call sites.
+- Flag manual formatting churn that fights the repository formatter; when Ruff is configured, prefer `ruff format` and Ruff diagnostics over subjective style edits.
 - Report dependency usage that is unpinned or unnecessary for the change.
 - Flag vendored libraries, wheelhouses, copied site-packages, or fallback dependency mirrors.
 - Flag new external dependencies that are missing hash-locked pins in the owning requirements file.
diff --git a/.github/skills/internal-python-project/SKILL.md b/.github/skills/internal-python-project/SKILL.md
index 509244e..03dfbe8 100644
--- a/.github/skills/internal-python-project/SKILL.md
+++ b/.github/skills/internal-python-project/SKILL.md
@@ -31,9 +31,13 @@ description: Use when creating or modifying Python package or application code w
 ## Compact Python baseline
 
 - Prefer early returns, guard clauses, clear names, and readable control flow.
+- Keep functions small enough to read without tracing hidden state. Prefer explicit inputs over module-level lookups inside reusable logic.
 - Add type hints on public or non-trivial function signatures.
 - Keep comments, docstrings, logs, exceptions, and CLI output in English.
 - Use the repository-declared runtime before falling back to ambient `python3`.
+- Centralize behavioral configuration instead of scattering magic values through services, adapters, or library modules. Put environment-specific and operator-tuned values in a settings module, application factory, CLI adapter, framework configuration, or composition root.
+- Pass configuration into reusable project code through typed settings, constructor arguments, or function parameters. Domain and service code should not read environment variables, files, or deployment defaults directly unless that boundary is its explicit responsibility.
+- Do not confuse domain invariants with configuration. Stable rules that belong to the domain may stay near the domain code; deployment-specific paths, endpoints, thresholds, defaults, and feature switches should live at the configuration boundary.
 - Do not vendor libraries, wheelhouses, copied site-packages, or fallback dependency mirrors.
 - If external packages are introduced, keep exact pins and hashes in the owning requirements file.
 
@@ -66,6 +70,7 @@ Load `references/logging-and-reporting.md` when project code needs a professiona
 - Follow the repository's existing framework before introducing FastAPI, Flask, Django, or a new dependency stack.
 - Use dataclasses or typed DTOs for internal contracts, and boundary-validation models where the framework already expects them.
 - Keep async flows end-to-end; do not mix blocking libraries into async request paths without an explicit bridge.
+- When Ruff is configured for the project, let `ruff format` own formatting and use `ruff check` before broader test runs. Avoid hand-formatting that creates churn against the configured formatter.
 
 ## Test-shape guidance
 
diff --git a/.github/skills/internal-python-script/SKILL.md b/.github/skills/internal-python-script/SKILL.md
index 678f3a5..509f942 100644
--- a/.github/skills/internal-python-script/SKILL.md
+++ b/.github/skills/internal-python-script/SKILL.md
@@ -32,6 +32,9 @@ description: Use when creating or modifying standalone Python scripts, CLIs, or
 
 - Standalone tools should default to a dedicated folder or toolkit root, not a loose top-level `.py` file.
 - Keep entrypoints thin: parse arguments, resolve paths, orchestrate helpers, and return an exit code through `main() -> int` plus `raise SystemExit(main())`.
+- Keep script-owned configuration visible at the entrypoint boundary. In single-file scripts, place a clearly named `Configuration` section near the end of the file, after helper definitions and before `main()` or `raise SystemExit(main())`.
+- Name configuration values by purpose, not by type: paths, file names, field lists, thresholds, defaults, mappings, filters, and output modes should explain what behavior they control.
+- Do not hide script-specific configuration inside helper modules or libraries. Helpers should accept explicit parameters or a small typed settings object when several values travel together.
 - Keep single-file scripts under 400 lines when possible. At 300 lines, review whether orchestration and helper boundaries stay clear; at 400 lines, split-or-justify is required.
 - Place shared helper logic in local helper modules, preferably under `utils/` when the toolkit structure supports that layout.
 - For operator-facing script work, crossing the 400-line threshold should move toward a toolkit or project structure according to the primary contract, not an ever-growing single entrypoint.
@@ -52,6 +55,7 @@ description: Use when creating or modifying standalone Python scripts, CLIs, or
 ## Compact Python baseline
 
 - Prefer early returns, guard clauses, clear names, and readable control flow.
+- Keep script-owned configuration at the entrypoint boundary with simple descriptive names.
 - Add type hints on public or non-trivial function signatures.
 - Keep comments, docstrings, logs, exceptions, and CLI output in English.
 - Use the repository-declared runtime before falling back to ambient `python3`.
@@ -95,6 +99,7 @@ Keep these rules visible while drafting:
 - Use coverage reports to inspect missing behavior on touched code, not to force blanket 100% coverage.
 - For bugfixes, features, and intentional behavior changes, start test-first through the public CLI or stable helper seam: add or update the failing test, confirm it fails for the intended reason, then implement the smallest fix.
 - For refactors, prose-only updates, generated fixtures, or mechanical formatting with no executable behavior change, run the existing focused tests plus `py_compile` or `compileall` instead of manufacturing speculative tests.
+- When Ruff is configured, run `ruff format` for formatting-only Python edits and `ruff check` for lint feedback before wider test runs.
 - Prefer existing repository commands such as `make lint`, `make test`, or a shared script runner before inventing a one-off validation path.
 
 ## Runtime guidance
diff --git a/.github/skills/internal-python/SKILL.md b/.github/skills/internal-python/SKILL.md
index 5ad9063..77711c8 100644
--- a/.github/skills/internal-python/SKILL.md
+++ b/.github/skills/internal-python/SKILL.md
@@ -30,12 +30,16 @@ every Python edit; load them only when the task proves script or project depth.
 - Classify Python work by primary contract before choosing structure: direct operator execution belongs to `internal-python-script`; importable package, application, service, or framework behavior belongs to `internal-python-project`.
 - Do not classify by file count alone. A multi-module toolkit can remain script-owned when its main contract is CLI execution.
 - Prefer early returns, guard clauses, clear names, and readable control flow.
+- Keep functions small enough to read without tracing hidden state. Prefer explicit inputs over module-level lookups inside reusable logic.
 - Add type hints on public or non-trivial function signatures.
 - Treat 300 lines as a review threshold for cohesive Python files.
 - Treat 400 lines as a split-or-justify threshold: split repeated decisions into focused modules or document why a single file remains clearer.
 - Apply pragmatic DRY: extract repeated decision logic, but do not force abstractions for one-off control flow.
+- Centralize behavioral configuration instead of scattering magic values through implementation code. Use clear names for paths, field lists, thresholds, defaults, mappings, feature switches, and external endpoint values.
+- Do not confuse domain invariants with configuration. Stable rules that belong to the domain may stay near the domain code; environment-specific or operator-tuned values belong at an entrypoint, settings module, adapter, or composition boundary.
 - Keep comments, docstrings, logs, exceptions, and CLI output in English.
 - Use the repository-declared runtime before falling back to ambient `python3`.
+- When Ruff is configured for the target repository, let `ruff format` own formatting and use Ruff diagnostics for import order and simple style cleanup. Do not create manual formatting churn that fights the configured formatter.
 - When a test must modify `sys.path` before importing a standalone script, keep the affected import after that setup and mark only that import with `# noqa: E402`; remove truly unused imports or variables instead of suppressing them.
 - Add or update tests for testable logic.
 - Do not vendor libraries, wheelhouses, copied site-packages, or fallback dependency mirrors.

From 7022f15d62f55815afde0269382e93ca499f3056 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Sun, 21 Jun 2026 15:59:39 +0200
Subject: [PATCH 08/11] feat: Implement compact evidence reporting and
 structured data evidence budget across various skills

---
 .../internal-gateway-execute-plans/SKILL.md   |  6 +++
 .../SKILL.md                                  |  2 +
 .../skills/internal-gateway-review/SKILL.md   |  9 +++-
 .../references/review-gate.md                 |  1 +
 .../internal-gateway-simple-task/SKILL.md     | 27 +++++++++--
 .../references/plan-mode.md                   | 36 ++++++++++-----
 .../internal-gateway-writing-plans/SKILL.md   | 16 +++++--
 .../patches/openai-spreadsheet.patch          | 23 ++++++++++
 ...owers-verification-before-completion.patch | 17 +++++++
 .../references/imported-asset-overrides.yaml  | 28 ++++++++++++
 .../references/managed-resource-scope.md      | 15 +++++++
 .github/skills/openai-spreadsheet/SKILL.md    | 13 ++++++
 .../SKILL.md                                  |  7 +++
 .../test_imported_asset_overrides_contract.py |  2 +
 tests/test_token_budget_skill_contract.py     | 45 +++++++++++++++++++
 15 files changed, 227 insertions(+), 20 deletions(-)
 create mode 100644 .github/skills/local-agent-sync-external-resources/patches/openai-spreadsheet.patch
 create mode 100644 .github/skills/local-agent-sync-external-resources/patches/superpowers-verification-before-completion.patch
 create mode 100644 tests/test_token_budget_skill_contract.py

diff --git a/.github/skills/internal-gateway-execute-plans/SKILL.md b/.github/skills/internal-gateway-execute-plans/SKILL.md
index 72d5217..1f27227 100644
--- a/.github/skills/internal-gateway-execute-plans/SKILL.md
+++ b/.github/skills/internal-gateway-execute-plans/SKILL.md
@@ -50,6 +50,7 @@ consumes approved `compact` and `extended` retained plans.
 - Reject `compact` folders outside the `mini-plan-*` convention.
 - Ignore `questions.md` during execution.
 - Maintain a compact execution state and prefer targeted rereads over full file re-ingestion unless new evidence invalidates current state.
+- Use `Compact Evidence Reporting` for large validator output: read enough output to decide the state honestly, then retain command, exit code, material counts, header or schema checks, changed files, and exact gaps instead of pasting raw logs.
 - Infer the execution strategy from `Plan profile`, folder shape, merged control-contract sections in `02-control.md` when applicable, and the validation path. Do not require a separate retained-plan consumer field.
 - Audit only mandatory requirements that are applicable; do not convert specialist rules into universal policy.
 - Use `superpowers-verification-before-completion` as the fresh-evidence owner; do not duplicate its mechanics.
@@ -71,6 +72,10 @@ scope, anti-scope, and validation path, then iterate:
 5. Continue only while evidence improves and no stop condition fires.
 6. Stop with `DONE`, a blocker, or an explicit evidence gap.
 
+Apply `Compact Evidence Reporting` after each focused validation: preserve the
+exact gap and proof path, but keep large outputs summarized unless the raw
+output is itself the missing evidence.
+
 Stop on scope drift, destructive action, owner conflict, missing validation
 path, human approval need, secret exposure risk, or repeated non-improving
 failures.
@@ -195,6 +200,7 @@ Before declaring any closeout step complete:
 - Hiding ownership conflicts instead of escalating a next owner and validation path.
 - Continuing the Agentic Execution Loop after evidence stops improving or a
   stop condition fires.
+- Pasting raw large validator output when compact evidence would preserve the same proof.
 - Packaging `DONE` while evidence gaps still require `APPLIED_UNVERIFIED`, `PARTIAL`, or `BLOCKED`.
 - Declaring a non-`DONE` state without writing or updating the `<STATE>-plan-state.md` marker.
 - Leaving stale `<STATE>-plan-state.md` markers behind after a state transition.
diff --git a/.github/skills/internal-gateway-idea-brainstorming/SKILL.md b/.github/skills/internal-gateway-idea-brainstorming/SKILL.md
index d1b424f..9f04ddf 100644
--- a/.github/skills/internal-gateway-idea-brainstorming/SKILL.md
+++ b/.github/skills/internal-gateway-idea-brainstorming/SKILL.md
@@ -35,6 +35,7 @@ through retained-plan creation. It stops before execution.
 - Same-conversation support-skill loading is not a lane change.
 - Idea Gate 0 remains mandatory.
 - Start with a bounded evidence pass ordered by risk. Read only the smallest local owner evidence needed to classify the request before asking questions.
+- For large tabular files, generated reports, or long log exports, keep the bounded evidence pass aggregate-first: collect file sizes, schema or headers, counts, anomalies, and targeted slices before any deeper read.
 - When authoritative platform semantics control feasibility or ownership, verify them early in the bounded evidence pass.
 - This gateway is not a specialized execution owner. A concrete task may not be accepted for execution here until Idea Gate 0 is `grill-me satisfied` and `Critical Gate 2` is `confident`.
 - For a direct concrete operation, emit `Specialization Checkpoint: gated`, explain that this owner cannot decide task ownership or execute yet, and continue with the bounded evidence pass plus mandatory `grill-me`.
@@ -59,6 +60,7 @@ State rules:
 - If the incoming request is already concrete (file edit, command execution, validator run, or implementation step), start with `Specialization Checkpoint: gated` before Idea Gate 0.
 - At `Specialization Checkpoint: gated`, name the recommended specialized owner (`internal-gateway-simple-task` by default, `internal-gateway-review` for defect-first review, `internal-gateway-critical-master` for pressure testing), but do not ask the user to keep this owner yet.
 - Continue through the bounded evidence pass, mandatory `grill-me`, and critical gate before asking whether this owner should stay in charge of the task.
+- Before the initial numbered block, keep large-file and large-log evidence compact: summarize counts, headers, anomalies, routes, and open gaps unless raw content is itself the missing evidence.
 - After the evidence pass, load `grill-me` and ask one mandatory numbered bulk question block with recommendations and defaults.
 - Before the initial numbered block, emit a compact facts/options summary derived from the bounded evidence pass.
 - Ask further focused numbered bulk blocks only for unresolved, dependent, or reopened branches.
diff --git a/.github/skills/internal-gateway-review/SKILL.md b/.github/skills/internal-gateway-review/SKILL.md
index eff1662..c6bea3b 100644
--- a/.github/skills/internal-gateway-review/SKILL.md
+++ b/.github/skills/internal-gateway-review/SKILL.md
@@ -17,8 +17,7 @@ Treat this section as an audit and routing index, not a preload bundle. Load a
 referenced skill only when the domain, finding, blocker, or phase requires it.
 
 Portable review orchestrator. Owns review scope, lens selection, findings
-consolidation, critical support, and remediation-plan transition. It does not
-apply fixes.
+consolidation, critical support, and remediation-plan transition. It does not apply fixes.
 
 Before any user-visible verdict, run a lightweight internal check for evidence,
 severity, false positives, contrary evidence, and scope narrowing. Load
@@ -34,6 +33,11 @@ evidence gap requires one; never preload referenced skills; show at most 5
 material findings unless exhaustive review is requested; summarize omitted
 low-risk observations separately, not as findings.
 
+Use `Compact Evidence Reporting` for large diffs, generated files, tabular
+exports, and logs: keep findings defect-first, cite the smallest excerpt or
+file point that proves impact, and avoid dumping large raw blocks when a
+targeted excerpt plus evidence path preserves the same proof.
+
 ## Review To Plan Transition
 
 Before creating, accepting, or routing a remediation plan, keep the review
@@ -56,6 +60,7 @@ gateway does not choose the execution owner.
 
 - Findings stay defect-first.
 - Review flow preserves compact context: prioritize diff and failing evidence first, then expand only when an evidence gap remains.
+- Large evidence may be reported compactly, but each material finding still keeps severity, confidence, evidence gap, counter-validation result, and route or next owner.
 - Review output carries findings, severity, confidence, evidence gap, counter-validation result, route or next owner, and a Review Gate outcome before the final verdict.
 - The review cannot present analysis to the user until counter-validation confirms it or reopens material gaps.
 - Remediation-plan transitions preserve a 100% material-finding coverage map or explicitly declare a `partial remediation plan`.
diff --git a/.github/skills/internal-gateway-review/references/review-gate.md b/.github/skills/internal-gateway-review/references/review-gate.md
index cb6f17d..5c0c4f7 100644
--- a/.github/skills/internal-gateway-review/references/review-gate.md
+++ b/.github/skills/internal-gateway-review/references/review-gate.md
@@ -21,5 +21,6 @@ Use this reference when `internal-gateway-review` needs to package findings befo
 
 - Keep the gate visible before any fixes.
 - Run counter-validation before the final user-visible verdict; challenge each finding for evidence, severity, route, and contrary proof.
+- For large diffs, generated files, logs, or tabular exports, keep evidence compact: cite the smallest excerpt or path that proves the finding and summarize omitted raw volume.
 - Report only material self-critique results: corrections, confidence changes, evidence gaps, or confirmation that no material issue was found.
 - Use the gate to route each actionable finding to the smallest next owner.
diff --git a/.github/skills/internal-gateway-simple-task/SKILL.md b/.github/skills/internal-gateway-simple-task/SKILL.md
index cb0f4e5..8fc1d08 100644
--- a/.github/skills/internal-gateway-simple-task/SKILL.md
+++ b/.github/skills/internal-gateway-simple-task/SKILL.md
@@ -70,6 +70,21 @@ Classify every simple task before operational work as `full-gate`,
 - If planning, review, critical pressure, or multi-phase validation becomes the
   real job, `escalate`.
 
+### Token Budget Gate
+
+- Run a `Token Budget Gate` before choosing `trivial-skip` or `plan-mode` when
+  the user asks for low-token execution or the task centers on large tabular
+  files, log exports, repeated tool output, or broad file changes.
+- For Copilot or debug log analysis, start with file size, model-call counts,
+  prompt or token aggregates, tool-span counts, result-size summaries, and
+  targeted slices; avoid full JSON dumps or prompt bodies unless they are the
+  missing evidence.
+- Keep `trivial-skip` only for truly tiny local work with obvious validation
+  and no material completeness risk.
+- If context pressure could hide required validation, data integrity, or route
+  ownership, prefer `plan-mode` and apply the `Plan Profile Selection Guard`
+  before proposing `compact`.
+
 ## Simple Procedure
 
 1. Inspect local files first.
@@ -147,9 +162,15 @@ the output shape changes.
 ### Profile
 
 - Default to `compact` (`tmp/superpowers/mini-plan-*`) with
-  `01-change-summary.md` and `02-execution.md`.
-- Use `extended` only when the task needs multi-slice execution, multiple
-  independent validators, an articulated anti-scope, or external pins.
+  `01-change-summary.md` and `02-execution.md` only when the task stays within
+  one owner, one execution lane, one primary validation path, and low
+  completeness risk.
+- Apply the `Plan Profile Selection Guard` before proposing `compact`.
+- Use `extended` when the task needs multi-slice execution, multiple
+  independent validators, an articulated anti-scope, external pins,
+  cross-skill token-discipline work, validator-impacting changes, or
+  exports, generated reports, and datasets that need non-trivial
+  reconciliation.
 
 ### Procedure
 
diff --git a/.github/skills/internal-gateway-simple-task/references/plan-mode.md b/.github/skills/internal-gateway-simple-task/references/plan-mode.md
index df9ab53..a60fc09 100644
--- a/.github/skills/internal-gateway-simple-task/references/plan-mode.md
+++ b/.github/skills/internal-gateway-simple-task/references/plan-mode.md
@@ -39,31 +39,45 @@ signal and ask for explicit confirmation before switching to plan mode.
   provisioning).
 - There is material risk that context pressure or chat limits will interrupt
   the work before it can be validated.
+- The task centers on large `.csv`, `.tsv`, `.xlsx`, JSON log exports,
+  repeated tool output, or broad file changes that would bloat chat context.
 - The user is asking for a large refactor, migration, or cross-file mechanical
   change that is safer as a tracked plan.
 
 Do not switch to plan mode implicitly without declaring the detected signals
 and asking for user confirmation.
 
-## Profile selection
+### Token Budget Gate
+
+When the cost signals come mainly from context pressure instead of task count,
+prefer a compact evidence posture rather than a raw-output posture. Keep same-chat
+execution only for tiny local work; otherwise switch to `plan-mode` and let the
+profile guard choose whether `compact` is still safe.
 
-- **Default `compact`**: use for a single owner, concrete target, one primary
-  validation path, and one execution lane. Folder name follows
-  `tmp/superpowers/mini-plan-*` and contains `01-change-summary.md` and
-  `02-execution.md`.
-- **Use `extended` only when**: the task needs multi-slice execution, several
-  independent validators, an articulated anti-scope, or external pins that must
-  be tracked in a control file.
+## Profile selection
 
-When in doubt, prefer `compact`. A simple task that needs a plan usually does
-not need the overhead of an extended plan.
+- **Default `compact`**: use only when the task stays within a single owner,
+  concrete target, one primary validation path, one execution lane, and low
+  completeness risk. Folder name follows `tmp/superpowers/mini-plan-*` and
+  contains `01-change-summary.md` and `02-execution.md`.
+- **Plan Profile Selection Guard**: escalate to `extended` when context or
+  completeness risk is material, especially for cross-skill token-discipline
+  work, validator-impacting changes, exports or generated reports, datasets
+  that need non-trivial reconciliation, several independent validators, an
+  articulated anti-scope, or external pins that must be tracked in a control
+  file.
+
+When profile safety is in doubt, prefer `extended` and state why. Prefer
+`compact` only when the plan can record the contrary evidence that keeps
+lower-context execution safe.
 
 ## Confirmation rule for implicit triggers
 
 For implicit cost-signal triggers, emit a short statement that:
 
 1. Names the detected cost signals.
-2. Proposes `plan-mode` with a default `compact` profile.
+2. Proposes `plan-mode` with the safest profile suggested by the signals,
+   defaulting to `compact` only when the profile guard stays clear.
 3. Asks the user to confirm, decline, or choose `extended`.
 
 Do not write the retained plan until the user confirms.
diff --git a/.github/skills/internal-gateway-writing-plans/SKILL.md b/.github/skills/internal-gateway-writing-plans/SKILL.md
index e7a6a4e..4063ff2 100644
--- a/.github/skills/internal-gateway-writing-plans/SKILL.md
+++ b/.github/skills/internal-gateway-writing-plans/SKILL.md
@@ -46,15 +46,23 @@ New `compact` plans should use `tmp/superpowers/mini-plan-*`.
 | `compact` | Single owner, concrete target, one validation path, low-to-medium risk, and one execution lane. Best fit for small/fast executors after positive handoff validation. | `01-change-summary.md`, `02-execution.md` |
 | `extended` | Cross-family changes, higher risk, lower-context execution, multiple validators, or multi-slice execution state. Soft-limit profile: use judgment-based size review with completeness over compression, explicit control files, and deterministic read order. | `01-change-summary.md`, `02-control.md`, `03-execution.md`, additional numbered files by category (`04-...`). |
 
-Escalate to `extended` when completeness risk is material: exports, reports, or
-datasets with non-trivial reconciliation; external API contracts
-(credentials, pagination, retries, schema pinning); executive-facing output;
-multiple validators; or synced always-on guidance edits.
+### Plan Profile Selection Guard
+
+Escalate to `extended` when completeness or context-discipline risk is material:
+cross-skill token-discipline changes; exports, generated reports, or datasets
+with non-trivial reconciliation; validator-impacting contract changes;
+external API contracts (credentials, pagination, retries, schema pinning);
+executive-facing output; multiple validators; or synced always-on guidance
+edits.
 
 Do not use `compact` when the executor needs exact sources, target files,
 validators, blockers, or external pins that only `02-control.md`
 can provide.
 
+If `compact` is still chosen near one of those edges, the plan must record the
+contrary evidence that keeps one owner, one execution lane, and one validation
+path sufficient despite lower-context execution.
+
 ## Explicit Constraints
 
 - Create retained plans under `tmp/superpowers/<clear-action-or-task-name>/`.
diff --git a/.github/skills/local-agent-sync-external-resources/patches/openai-spreadsheet.patch b/.github/skills/local-agent-sync-external-resources/patches/openai-spreadsheet.patch
new file mode 100644
index 0000000..49bda19
--- /dev/null
+++ b/.github/skills/local-agent-sync-external-resources/patches/openai-spreadsheet.patch
@@ -0,0 +1,23 @@
+diff --git a/.github/skills/openai-spreadsheet/SKILL.md b/.github/skills/openai-spreadsheet/SKILL.md
+--- a/.github/skills/openai-spreadsheet/SKILL.md
++++ b/.github/skills/openai-spreadsheet/SKILL.md
+@@ -33,6 +33,19 @@ IMPORTANT: System and user instructions always take precedence.
+ - Use `openpyxl.chart` for native Excel charts when needed.
+ - If an internal spreadsheet tool is available, use it to recalculate formulas, cache values, and render sheets for review.
+
++## Structured Data Evidence Budget
++- For large `.xlsx`, `.csv`, and `.tsv` work, keep user-facing evidence compact:
++  report schema or headers, row counts, column counts, targeted anomalies,
++  checksums or hashes when useful, sampled examples, and validation gaps.
++- Start discovery with headers plus a small sample, then move to deterministic
++  full-file checks when correctness depends on the whole dataset.
++- Sampling does not replace full-file validation for transforms, merges,
++  source-link checks, empty-row checks, column moves, stable ID generation,
++  duplicate ID detection, or reconciliation.
++- Preserve source links, formulas, formatting where applicable, empty-row
++  anomalies, duplicate IDs, stable generated IDs, and missing column data as
++  material integrity checks.
++
+ ## Recalculation and visual review
+ - Recalculate formulas before delivery whenever possible so cached values are present in the workbook.
+ - Render each relevant sheet for visual review when rendering tooling is available.
diff --git a/.github/skills/local-agent-sync-external-resources/patches/superpowers-verification-before-completion.patch b/.github/skills/local-agent-sync-external-resources/patches/superpowers-verification-before-completion.patch
new file mode 100644
index 0000000..36a2a25
--- /dev/null
+++ b/.github/skills/local-agent-sync-external-resources/patches/superpowers-verification-before-completion.patch
@@ -0,0 +1,17 @@
+diff --git a/.github/skills/superpowers-verification-before-completion/SKILL.md b/.github/skills/superpowers-verification-before-completion/SKILL.md
+--- a/.github/skills/superpowers-verification-before-completion/SKILL.md
++++ b/.github/skills/superpowers-verification-before-completion/SKILL.md
+@@ -37,6 +37,13 @@ BEFORE claiming any status or expressing satisfaction:
+ Skip any step = lying, not verifying
+ ```
+
++## Compact Evidence Reporting
++
++Read enough full output to judge the claim honestly. When the output is long,
++report the command, exit code, material counts, failing checks, header or
++schema checks, changed files, and the exact gap instead of pasting raw logs.
++Keep raw output only when the raw text itself is the disputed evidence.
++
+ ## Common Failures
+
+ | Claim | Requires | Not Sufficient |
diff --git a/.github/skills/local-agent-sync-external-resources/references/imported-asset-overrides.yaml b/.github/skills/local-agent-sync-external-resources/references/imported-asset-overrides.yaml
index a2fa62e..e0f20d0 100644
--- a/.github/skills/local-agent-sync-external-resources/references/imported-asset-overrides.yaml
+++ b/.github/skills/local-agent-sync-external-resources/references/imported-asset-overrides.yaml
@@ -96,3 +96,31 @@ overrides:
     expected_content_hash: 32bb78a434c38bbc2af1eea970236ba989270eb1b631e8d4f2d8928a0a125d62
     baseline_repo_commit: "e74f006"
     validation_note: Stop the refresh if the patch does not apply cleanly; review whether an internal wrapper should replace the override.
+  - id: openai-spreadsheet-structured-data-evidence-budget
+    target_path: .github/skills/openai-spreadsheet/SKILL.md
+    source_family: openai/skills
+    lifecycle_mode: post-refresh-patch
+    apply_strategy: git-apply-3way
+    approval: explicit-user-counter-validated
+    reason: >-
+      Preserve the repository-specific structured-data evidence budget so CSV,
+      TSV, and XLSX work stays low-token without weakening full-file integrity
+      checks after an imported office-skill refresh.
+    patch_path: patches/openai-spreadsheet.patch
+    expected_content_hash: f42e8ea1128c448a7ccaa42b2ca39540c5b9109a7e6e6ee4abf4e3e5e4a80722
+    baseline_repo_commit: "e6afb0d"
+    validation_note: Stop the refresh if the patch does not apply cleanly; review whether an internal wrapper should replace the override.
+  - id: superpowers-verification-before-completion-compact-evidence-reporting
+    target_path: .github/skills/superpowers-verification-before-completion/SKILL.md
+    source_family: obra/superpowers
+    lifecycle_mode: post-refresh-patch
+    apply_strategy: git-apply-3way
+    approval: explicit-user-counter-validated
+    reason: >-
+      Preserve the repository-specific compact evidence reporting rule so long
+      verification output is summarized safely without weakening the imported
+      evidence-before-claims contract.
+    patch_path: patches/superpowers-verification-before-completion.patch
+    expected_content_hash: 222675dbdc57434ae238b2301bdc2258eebe76f172ca3fbb3e3d97a2117bac79
+    baseline_repo_commit: "f2cbfbe"
+    validation_note: Stop the refresh if the patch does not apply cleanly; review whether an internal wrapper should replace the override.
diff --git a/.github/skills/local-agent-sync-external-resources/references/managed-resource-scope.md b/.github/skills/local-agent-sync-external-resources/references/managed-resource-scope.md
index 2175221..0fbafdd 100644
--- a/.github/skills/local-agent-sync-external-resources/references/managed-resource-scope.md
+++ b/.github/skills/local-agent-sync-external-resources/references/managed-resource-scope.md
@@ -76,6 +76,14 @@ Managed skills:
   `superpowers-verification-before-completion`; `writing-plans` ->
   `superpowers-writing-plans`.
 
+Approved in-place overrides:
+
+- `superpowers-verification-before-completion`: replay
+  `superpowers-verification-before-completion-compact-evidence-reporting`
+  after each refresh so long verification output is summarized by command,
+  exit code, counts, schema checks, changed files, and exact gaps without
+  weakening the upstream evidence-before-claims rule.
+
 ### `hashicorp/agent-skills`
 
 Source:
@@ -145,6 +153,13 @@ Retained support-only office skills:
 - `doc` -> `openai-docx`; `spreadsheet` -> `openai-spreadsheet`; `slides` ->
   `openai-slides`.
 
+Approved in-place overrides:
+
+- `openai-spreadsheet`: replay
+  `openai-spreadsheet-structured-data-evidence-budget` after each refresh so
+  large spreadsheet and tabular workflows keep the repository-specific
+  structured-data evidence budget while preserving full-file correctness checks.
+
 ### `sickn33/antigravity-awesome-skills`
 
 Source:
diff --git a/.github/skills/openai-spreadsheet/SKILL.md b/.github/skills/openai-spreadsheet/SKILL.md
index 2e393dc..2236908 100644
--- a/.github/skills/openai-spreadsheet/SKILL.md
+++ b/.github/skills/openai-spreadsheet/SKILL.md
@@ -33,6 +33,19 @@ IMPORTANT: System and user instructions always take precedence.
 - Use `openpyxl.chart` for native Excel charts when needed.
 - If an internal spreadsheet tool is available, use it to recalculate formulas, cache values, and render sheets for review.
 
+## Structured Data Evidence Budget
+- For large `.xlsx`, `.csv`, and `.tsv` work, keep user-facing evidence compact:
+  report schema or headers, row counts, column counts, targeted anomalies,
+  checksums or hashes when useful, sampled examples, and validation gaps.
+- Start discovery with headers plus a small sample, then move to deterministic
+  full-file checks when correctness depends on the whole dataset.
+- Sampling does not replace full-file validation for transforms, merges,
+  source-link checks, empty-row checks, column moves, stable ID generation,
+  duplicate ID detection, or reconciliation.
+- Preserve source links, formulas, formatting where applicable, empty-row
+  anomalies, duplicate IDs, stable generated IDs, and missing column data as
+  material integrity checks.
+
 ## Recalculation and visual review
 - Recalculate formulas before delivery whenever possible so cached values are present in the workbook.
 - Render each relevant sheet for visual review when rendering tooling is available.
diff --git a/.github/skills/superpowers-verification-before-completion/SKILL.md b/.github/skills/superpowers-verification-before-completion/SKILL.md
index b60f91d..035e583 100644
--- a/.github/skills/superpowers-verification-before-completion/SKILL.md
+++ b/.github/skills/superpowers-verification-before-completion/SKILL.md
@@ -37,6 +37,13 @@ BEFORE claiming any status or expressing satisfaction:
 Skip any step = lying, not verifying
 ```
 
+## Compact Evidence Reporting
+
+Read enough full output to judge the claim honestly. When the output is long,
+report the command, exit code, material counts, failing checks, header or
+schema checks, changed files, and the exact gap instead of pasting raw logs.
+Keep raw output only when the raw text itself is the disputed evidence.
+
 ## Common Failures
 
 | Claim | Requires | Not Sufficient |
diff --git a/tests/test_imported_asset_overrides_contract.py b/tests/test_imported_asset_overrides_contract.py
index 90518bd..d834e49 100644
--- a/tests/test_imported_asset_overrides_contract.py
+++ b/tests/test_imported_asset_overrides_contract.py
@@ -32,6 +32,8 @@ def test_imported_asset_override_registry_tracks_expected_imported_targets() ->
         ".github/skills/superpowers-subagent-driven-development/SKILL.md",
         ".github/skills/superpowers-requesting-code-review/SKILL.md",
         ".github/skills/grill-me/SKILL.md",
+        ".github/skills/openai-spreadsheet/SKILL.md",
+        ".github/skills/superpowers-verification-before-completion/SKILL.md",
     }
     assert all(
         entry["approval"] == "explicit-user-counter-validated" for entry in overrides
diff --git a/tests/test_token_budget_skill_contract.py b/tests/test_token_budget_skill_contract.py
new file mode 100644
index 0000000..21b76d0
--- /dev/null
+++ b/tests/test_token_budget_skill_contract.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def read_text(relative_path: str) -> str:
+    return Path(relative_path).read_text(encoding="utf-8")
+
+
+def test_token_budget_guardrails_are_distributed_by_owner() -> None:
+    simple_text = read_text(".github/skills/internal-gateway-simple-task/SKILL.md")
+    plan_mode_text = read_text(
+        ".github/skills/internal-gateway-simple-task/references/plan-mode.md"
+    )
+    idea_text = read_text(".github/skills/internal-gateway-idea-brainstorming/SKILL.md")
+    execute_text = read_text(".github/skills/internal-gateway-execute-plans/SKILL.md")
+    review_text = read_text(".github/skills/internal-gateway-review/SKILL.md")
+    verification_text = read_text(
+        ".github/skills/superpowers-verification-before-completion/SKILL.md"
+    )
+    spreadsheet_text = read_text(".github/skills/openai-spreadsheet/SKILL.md")
+
+    assert "Token Budget Gate" in simple_text
+    assert "Copilot or debug log analysis" in simple_text
+    assert "Token Budget Gate" in plan_mode_text
+    assert "aggregate-first" in idea_text
+    assert "Compact Evidence Reporting" in execute_text
+    assert "Compact Evidence Reporting" in review_text
+    assert "Compact Evidence Reporting" in verification_text
+    assert "Structured Data Evidence Budget" in spreadsheet_text
+
+
+def test_structured_data_guardrails_preserve_full_file_correctness() -> None:
+    spreadsheet_text = read_text(".github/skills/openai-spreadsheet/SKILL.md")
+    lowered = spreadsheet_text.lower()
+
+    assert ".xlsx" in spreadsheet_text
+    assert ".csv" in spreadsheet_text
+    assert ".tsv" in spreadsheet_text
+    assert "row counts" in lowered
+    assert "column counts" in lowered
+    assert "full-file" in lowered
+    assert "sampling does not replace full-file validation" in lowered
+    assert "source links" in lowered
+    assert "duplicate ids" in lowered

From 790eb839e0654844a9356829de2a8ce5328d5941 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Sun, 21 Jun 2026 16:59:52 +0200
Subject: [PATCH 09/11] feat: Add local Copilot log analyzer skill and update
 related documentation

---
 .github/INVENTORY.md                          |  1 +
 .../internal-gateway-simple-task/SKILL.md     | 12 ++++
 .../references/plan-mode.md                   | 10 ++++
 .../local-copilot-log-analizer/SKILL.md       | 60 +++++++++++++++++++
 .../agents/openai.yaml                        |  4 ++
 tests/test_token_budget_skill_contract.py     | 13 ++++
 tests/test_validation_entrypoints_contract.py | 17 ++++++
 7 files changed, 117 insertions(+)
 create mode 100644 .github/skills/local-copilot-log-analizer/SKILL.md
 create mode 100644 .github/skills/local-copilot-log-analizer/agents/openai.yaml

diff --git a/.github/INVENTORY.md b/.github/INVENTORY.md
index 2d23b13..088f375 100644
--- a/.github/INVENTORY.md
+++ b/.github/INVENTORY.md
@@ -123,6 +123,7 @@ This file is the exact path inventory for the live GitHub Copilot catalog in thi
 - `.github/skills/local-agent-sync-external-resources/SKILL.md`
 - `.github/skills/local-agent-sync-global-copilot-configs-into-repo/SKILL.md`
 - `.github/skills/local-agent-sync-install-ai-resources/SKILL.md`
+- `.github/skills/local-copilot-log-analizer/SKILL.md`
 - `.github/skills/mattpocock-caveman/SKILL.md`
 - `.github/skills/openai-docx/SKILL.md`
 - `.github/skills/openai-gh-address-comments/SKILL.md`
diff --git a/.github/skills/internal-gateway-simple-task/SKILL.md b/.github/skills/internal-gateway-simple-task/SKILL.md
index 8fc1d08..dc8c287 100644
--- a/.github/skills/internal-gateway-simple-task/SKILL.md
+++ b/.github/skills/internal-gateway-simple-task/SKILL.md
@@ -79,6 +79,18 @@ Classify every simple task before operational work as `full-gate`,
   prompt or token aggregates, tool-span counts, result-size summaries, and
   targeted slices; avoid full JSON dumps or prompt bodies unless they are the
   missing evidence.
+- Keep compact reporting runner-agnostic: ask for bounded summaries, exit
+  state, counts, anomalies, and evidence gaps, but do not require `jq`, `awk`,
+  shell flags, or terminal-only recipes unless they are already the local
+  workflow being analyzed.
+- A cost checkpoint pauses before a new expensive tool burst, broad reread, or
+  repeated execution loop. It does not interrupt ordinary conversation,
+  grill-me questioning, or collaborative reasoning when no expensive tool
+  action is being launched.
+- If the user explicitly asks for full output, deeper slices, or continued
+  execution, name the likely token or context impact before expanding and then
+  either proceed with the smallest bounded next slice or ask for confirmation
+  before the new expensive burst.
 - Keep `trivial-skip` only for truly tiny local work with obvious validation
   and no material completeness risk.
 - If context pressure could hide required validation, data integrity, or route
diff --git a/.github/skills/internal-gateway-simple-task/references/plan-mode.md b/.github/skills/internal-gateway-simple-task/references/plan-mode.md
index a60fc09..0fa6fcd 100644
--- a/.github/skills/internal-gateway-simple-task/references/plan-mode.md
+++ b/.github/skills/internal-gateway-simple-task/references/plan-mode.md
@@ -54,6 +54,16 @@ prefer a compact evidence posture rather than a raw-output posture. Keep same-ch
 execution only for tiny local work; otherwise switch to `plan-mode` and let the
 profile guard choose whether `compact` is still safe.
 
+A cost checkpoint pauses before a new expensive tool burst, broad reread, or
+multi-step execution loop. It does not interrupt ordinary conversation,
+grill-me analysis, or collaborative study when no expensive tool action is
+starting.
+
+When the user explicitly asks for broader output, deeper analysis, or continued
+execution, name the likely token or context impact first and then either
+continue with the smallest bounded next slice or ask for confirmation before
+the new expensive burst.
+
 ## Profile selection
 
 - **Default `compact`**: use only when the task stays within a single owner,
diff --git a/.github/skills/local-copilot-log-analizer/SKILL.md b/.github/skills/local-copilot-log-analizer/SKILL.md
new file mode 100644
index 0000000..7312a73
--- /dev/null
+++ b/.github/skills/local-copilot-log-analizer/SKILL.md
@@ -0,0 +1,60 @@
+---
+name: local-copilot-log-analizer
+description: Use when analyzing GitHub Copilot Chat debug logs or prompt exports in this repository, especially for low-token diagnosis of token usage, model calls, tool spans, or oversized results.
+---
+
+# Local Copilot Log Analizer
+
+## Referenced skills
+
+- None.
+
+Repository-owned workflow owner for low-token analysis of GitHub Copilot Chat
+debug logs and prompt exports in this repository. Route analysis through the
+canonical `tools/analyze_copilot_debug_log` wrapper instead of recreating parser
+or aggregation logic in ad-hoc scripts or one-off shell pipelines.
+
+## When to use
+
+- The user wants to inspect Copilot Chat debug logs, prompt exports, token
+  usage, model-call counts, tool spans, or oversized result payloads.
+- The evidence is already in local files and the goal is to extract bounded
+  aggregates before drilling into raw records.
+- The task needs a repository-owned workflow for Copilot log analysis rather
+  than a change to the analyzer implementation.
+
+## When not to use
+
+- The request is about changing the analyzer implementation itself; edit
+  `tools/analyze_copilot_debug_log/` directly.
+- The user already asked for a full raw dump and explicitly accepted the extra
+  token and context cost.
+- The task cannot be grounded in local debug-log or prompt-export files.
+
+## Workflow
+
+1. Confirm the input kind and path first: `prompt-exports` or `debug-logs`.
+2. Use the canonical wrapper, not ad-hoc parsing:
+   - `bash tools/analyze_copilot_debug_log/run.sh prompt-exports <file>`
+   - `bash tools/analyze_copilot_debug_log/run.sh debug-logs <file> --format markdown`
+   - `./.github/scripts/run.sh analyze_copilot_debug_log --help` when the
+     wrapper surface is unclear.
+3. Start aggregate-first: file size, prompt or token aggregates, model-call
+   counts, tool-span counts, result-size summaries, and the smallest targeted
+   slices that can prove or disprove the current hypothesis.
+4. Prefer the wrapper's bounded `markdown` or `json` output when the input fits
+   the tool contract.
+5. Avoid full JSON dumps, full prompt bodies, or full log bodies unless the
+   user explicitly asks or the exact anomaly cannot be isolated any other way.
+6. If the user explicitly asks for deeper output, name the token or context
+   impact before expanding and keep the next slice bounded to the missing
+   evidence.
+7. Preserve the evidence path in the final explanation: input file, wrapper
+   command, output format, and the exact aggregate or slice that supported the
+   conclusion.
+
+## Validation
+
+- `bash tools/analyze_copilot_debug_log/run.sh prompt-exports --help`
+- `bash tools/analyze_copilot_debug_log/run.sh debug-logs --help`
+- `./.github/scripts/run.sh analyze_copilot_debug_log --help`
diff --git a/.github/skills/local-copilot-log-analizer/agents/openai.yaml b/.github/skills/local-copilot-log-analizer/agents/openai.yaml
new file mode 100644
index 0000000..b7e10e1
--- /dev/null
+++ b/.github/skills/local-copilot-log-analizer/agents/openai.yaml
@@ -0,0 +1,4 @@
+interface:
+  display_name: "Local Copilot Log Analizer"
+  short_description: "Analyze Copilot debug logs and prompt exports with bounded, aggregate-first output"
+  default_prompt: "Use $local-copilot-log-analizer to analyze GitHub Copilot Chat debug logs or prompt exports in this repository. Route to the canonical tools/analyze_copilot_debug_log wrapper, start aggregate-first, prefer bounded markdown or json output, and avoid full dumps unless they are the missing evidence."
diff --git a/tests/test_token_budget_skill_contract.py b/tests/test_token_budget_skill_contract.py
index 21b76d0..83f5756 100644
--- a/tests/test_token_budget_skill_contract.py
+++ b/tests/test_token_budget_skill_contract.py
@@ -30,6 +30,19 @@ def test_token_budget_guardrails_are_distributed_by_owner() -> None:
     assert "Structured Data Evidence Budget" in spreadsheet_text
 
 
+def test_token_budget_gate_stays_runner_agnostic_and_non_disruptive() -> None:
+    simple_text = read_text(".github/skills/internal-gateway-simple-task/SKILL.md")
+    plan_mode_text = read_text(
+        ".github/skills/internal-gateway-simple-task/references/plan-mode.md"
+    )
+
+    assert "do not require `jq`, `awk`" in simple_text
+    assert "does not interrupt ordinary conversation" in simple_text
+    assert "token or context impact" in simple_text
+    assert "does not interrupt ordinary conversation" in plan_mode_text
+    assert "token or context impact" in plan_mode_text
+
+
 def test_structured_data_guardrails_preserve_full_file_correctness() -> None:
     spreadsheet_text = read_text(".github/skills/openai-spreadsheet/SKILL.md")
     lowered = spreadsheet_text.lower()
diff --git a/tests/test_validation_entrypoints_contract.py b/tests/test_validation_entrypoints_contract.py
index c4c6ba1..9140305 100644
--- a/tests/test_validation_entrypoints_contract.py
+++ b/tests/test_validation_entrypoints_contract.py
@@ -132,3 +132,20 @@ def test_code_analysis_workflow_smoke_tests_runner_diagnostic_clis() -> None:
     )
     assert "./.github/scripts/run.sh analyze_copilot_debug_log --help" in workflow_text
     assert "./.github/scripts/run.sh benchmark_skill_tokens --help" in workflow_text
+
+
+def test_local_copilot_log_analizer_skill_routes_canonical_wrapper() -> None:
+    skill_text = read_text(".github/skills/local-copilot-log-analizer/SKILL.md")
+    agent_text = read_text(
+        ".github/skills/local-copilot-log-analizer/agents/openai.yaml"
+    )
+    inventory_text = read_text(".github/INVENTORY.md")
+
+    assert "## Referenced skills" in skill_text
+    assert "- None." in skill_text
+    assert "tools/analyze_copilot_debug_log/run.sh" in skill_text
+    assert "./.github/scripts/run.sh analyze_copilot_debug_log --help" in skill_text
+    assert "aggregate-first" in skill_text
+    assert "avoid full json dumps" in skill_text.lower()
+    assert "$local-copilot-log-analizer" in agent_text
+    assert ".github/skills/local-copilot-log-analizer/SKILL.md" in inventory_text

From 3504e32e6d79a237fd31321b74685b83f4692953 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Sun, 21 Jun 2026 17:14:29 +0200
Subject: [PATCH 10/11] fix: Correct variable usage in verify_venv_version
 function

---
 tools/analyze_copilot_debug_log/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/analyze_copilot_debug_log/run.sh b/tools/analyze_copilot_debug_log/run.sh
index dc28c56..4a3cf4f 100755
--- a/tools/analyze_copilot_debug_log/run.sh
+++ b/tools/analyze_copilot_debug_log/run.sh
@@ -93,7 +93,7 @@ verify_venv_version() {
         return 1
     fi
 
-    venv_version="$($venv_python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
+    venv_version="$("$venv_python" -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
     if [[ "$venv_version" == "$EXPECTED_PYTHON_MAJOR_MINOR" ]]; then
         return 0
     fi

From a847c1ab46a1c4758af3b8cf02b0ce683016c844 Mon Sep 17 00:00:00 2001
From: Diego Mauricio Lagos <diego.lagosmorales@pagopa.it>
Date: Sun, 21 Jun 2026 17:24:13 +0200
Subject: [PATCH 11/11] feat: Add local Copilot log analyzer skill and update
 related documentation fix: Correct spelling of 'analyzer' in skill references
 and tests feat: Enhance internal gateway review with new lens selection and
 validation guidelines

---
 .github/INVENTORY.md                          |  2 +-
 .../skills/internal-gateway-review/SKILL.md   | 22 +++++++++++++++++++
 .../SKILL.md                                  |  4 ++--
 .../agents/openai.yaml                        |  4 ++--
 tests/test_validation_entrypoints_contract.py | 10 ++++-----
 tests/test_workflow_review_contract.py        |  4 ++++
 6 files changed, 36 insertions(+), 10 deletions(-)
 rename .github/skills/{local-copilot-log-analizer => local-copilot-log-analyzer}/SKILL.md (97%)
 rename .github/skills/{local-copilot-log-analizer => local-copilot-log-analyzer}/agents/openai.yaml (77%)

diff --git a/.github/INVENTORY.md b/.github/INVENTORY.md
index 088f375..e74e1bd 100644
--- a/.github/INVENTORY.md
+++ b/.github/INVENTORY.md
@@ -123,7 +123,7 @@ This file is the exact path inventory for the live GitHub Copilot catalog in thi
 - `.github/skills/local-agent-sync-external-resources/SKILL.md`
 - `.github/skills/local-agent-sync-global-copilot-configs-into-repo/SKILL.md`
 - `.github/skills/local-agent-sync-install-ai-resources/SKILL.md`
-- `.github/skills/local-copilot-log-analizer/SKILL.md`
+- `.github/skills/local-copilot-log-analyzer/SKILL.md`
 - `.github/skills/mattpocock-caveman/SKILL.md`
 - `.github/skills/openai-docx/SKILL.md`
 - `.github/skills/openai-gh-address-comments/SKILL.md`
diff --git a/.github/skills/internal-gateway-review/SKILL.md b/.github/skills/internal-gateway-review/SKILL.md
index c6bea3b..63d60e6 100644
--- a/.github/skills/internal-gateway-review/SKILL.md
+++ b/.github/skills/internal-gateway-review/SKILL.md
@@ -9,6 +9,8 @@ description: Use when repository-owned work needs same-conversation defect-first
 
 - `internal-code-review`
 - `internal-high-level-review`
+- `internal-ai-resource-review`
+- `internal-copilot-audit`
 - `internal-gateway-critical-master`
 - `internal-gateway-writing-plans`
 - `internal-agent-support-next-step`
@@ -26,6 +28,25 @@ reopen when the check exposes a material gap.
 
 See `references/review-gate.md` for the review output contract and gate states.
 
+## Lens selection
+
+Select the review lens from the changed-path families, not from a single default.
+A diff may activate more than one lens; load each only when its evidence appears.
+
+- Code lens (`internal-code-review`): Python, Bash, Terraform, Java, or
+  Node.js/TypeScript source changes.
+- Systems lens (`internal-high-level-review`): architecture, workflow, or
+  cross-cutting impact beyond line-level defects.
+- AI-resource lens (`internal-ai-resource-review`, with `internal-copilot-audit`
+  as the drift sub-lens): repository-owned AI customization assets, including
+  `.github/skills/**`, `.github/agents/*.agent.md`, `.github/prompts/*.prompt.md`,
+  `.github/instructions/**`, `AGENTS.md`, `.github/copilot-instructions.md`,
+  `.github/INVENTORY.md`, and `**/agents/openai.yaml`.
+
+When the diff is mainly AI customization assets, the AI-resource lens is
+mandatory and the code lens stays scoped to any embedded scripts only. Do not
+let the code lens silently skip `.md` skill, agent, or instruction files.
+
 ## Token Discipline
 
 Inspect diff and failing evidence first; avoid broad repository scans unless an
@@ -59,6 +80,7 @@ gateway does not choose the execution owner.
 ## Validation
 
 - Findings stay defect-first.
+- Lens selection matches the changed-path families; AI customization assets route to `internal-ai-resource-review` (drift via `internal-copilot-audit`) instead of being skipped by the code lens.
 - Review flow preserves compact context: prioritize diff and failing evidence first, then expand only when an evidence gap remains.
 - Large evidence may be reported compactly, but each material finding still keeps severity, confidence, evidence gap, counter-validation result, and route or next owner.
 - Review output carries findings, severity, confidence, evidence gap, counter-validation result, route or next owner, and a Review Gate outcome before the final verdict.
diff --git a/.github/skills/local-copilot-log-analizer/SKILL.md b/.github/skills/local-copilot-log-analyzer/SKILL.md
similarity index 97%
rename from .github/skills/local-copilot-log-analizer/SKILL.md
rename to .github/skills/local-copilot-log-analyzer/SKILL.md
index 7312a73..8f6466e 100644
--- a/.github/skills/local-copilot-log-analizer/SKILL.md
+++ b/.github/skills/local-copilot-log-analyzer/SKILL.md
@@ -1,9 +1,9 @@
 ---
-name: local-copilot-log-analizer
+name: local-copilot-log-analyzer
 description: Use when analyzing GitHub Copilot Chat debug logs or prompt exports in this repository, especially for low-token diagnosis of token usage, model calls, tool spans, or oversized results.
 ---
 
-# Local Copilot Log Analizer
+# Local Copilot Log Analyzer
 
 ## Referenced skills
 
diff --git a/.github/skills/local-copilot-log-analizer/agents/openai.yaml b/.github/skills/local-copilot-log-analyzer/agents/openai.yaml
similarity index 77%
rename from .github/skills/local-copilot-log-analizer/agents/openai.yaml
rename to .github/skills/local-copilot-log-analyzer/agents/openai.yaml
index b7e10e1..3fff04f 100644
--- a/.github/skills/local-copilot-log-analizer/agents/openai.yaml
+++ b/.github/skills/local-copilot-log-analyzer/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
-  display_name: "Local Copilot Log Analizer"
+  display_name: "Local Copilot Log Analyzer"
   short_description: "Analyze Copilot debug logs and prompt exports with bounded, aggregate-first output"
-  default_prompt: "Use $local-copilot-log-analizer to analyze GitHub Copilot Chat debug logs or prompt exports in this repository. Route to the canonical tools/analyze_copilot_debug_log wrapper, start aggregate-first, prefer bounded markdown or json output, and avoid full dumps unless they are the missing evidence."
+  default_prompt: "Use $local-copilot-log-analyzer to analyze GitHub Copilot Chat debug logs or prompt exports in this repository. Route to the canonical tools/analyze_copilot_debug_log wrapper, start aggregate-first, prefer bounded markdown or json output, and avoid full dumps unless they are the missing evidence."
diff --git a/tests/test_validation_entrypoints_contract.py b/tests/test_validation_entrypoints_contract.py
index 9140305..8816084 100644
--- a/tests/test_validation_entrypoints_contract.py
+++ b/tests/test_validation_entrypoints_contract.py
@@ -134,10 +134,10 @@ def test_code_analysis_workflow_smoke_tests_runner_diagnostic_clis() -> None:
     assert "./.github/scripts/run.sh benchmark_skill_tokens --help" in workflow_text
 
 
-def test_local_copilot_log_analizer_skill_routes_canonical_wrapper() -> None:
-    skill_text = read_text(".github/skills/local-copilot-log-analizer/SKILL.md")
+def test_local_copilot_log_analyzer_skill_routes_canonical_wrapper() -> None:
+    skill_text = read_text(".github/skills/local-copilot-log-analyzer/SKILL.md")
     agent_text = read_text(
-        ".github/skills/local-copilot-log-analizer/agents/openai.yaml"
+        ".github/skills/local-copilot-log-analyzer/agents/openai.yaml"
     )
     inventory_text = read_text(".github/INVENTORY.md")
 
@@ -147,5 +147,5 @@ def test_local_copilot_log_analizer_skill_routes_canonical_wrapper() -> None:
     assert "./.github/scripts/run.sh analyze_copilot_debug_log --help" in skill_text
     assert "aggregate-first" in skill_text
     assert "avoid full json dumps" in skill_text.lower()
-    assert "$local-copilot-log-analizer" in agent_text
-    assert ".github/skills/local-copilot-log-analizer/SKILL.md" in inventory_text
+    assert "$local-copilot-log-analyzer" in agent_text
+    assert ".github/skills/local-copilot-log-analyzer/SKILL.md" in inventory_text
diff --git a/tests/test_workflow_review_contract.py b/tests/test_workflow_review_contract.py
index 6d36a85..783c827 100644
--- a/tests/test_workflow_review_contract.py
+++ b/tests/test_workflow_review_contract.py
@@ -52,6 +52,10 @@ def test_review_gateway_exists_and_stops_before_fixes() -> None:
     assert "defect-first review" in skill_text
     assert "does not apply fixes" in skill_text
     assert "internal-gateway-writing-plans" in skill_text
+    assert "internal-ai-resource-review" in skill_text
+    assert "internal-copilot-audit" in skill_text
+    assert "Lens selection" in skill_text
+    assert ".github/skills/**" in skill_text
     assert "internal-gateway-simple-task" in agent_text
     assert "Review Gate" in review_gate_text
     assert "severity" in review_gate_lower