From 38b30d59e8b9e4c477b8fe2a82c7f8c48a2c8195 Mon Sep 17 00:00:00 2001 From: Polichinl Date: Sat, 27 Jun 2026 19:32:49 +0200 Subject: [PATCH] =?UTF-8?q?test(docs):=20S6=20=E2=80=94=20doc-accuracy=20g?= =?UTF-8?q?uardrail=20(#76)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tests/test_doc_accuracy.py — the guard that keeps this refresh from silently re-staling (the root cause the epic fixes). Two checks: - living onboarding docs (README, docs/architecture/*, module READMEs) must not reference deleted symbols (PriogridCountryMapper / mapping/README / use_disk_cache / cachetools / geopandas) as current; an inline `legacy-ok` marker whitelists an intentional historical mention. - internal relative doc links (README + docs/** + module READMEs) must resolve. ADRs, CICs, and the historical reports are excluded from the symbol scan (they reference superseded designs as a record) but are still link-checked. Implemented as pytest tests so the same check runs in CI via the existing run_pytest workflow; docs/validate_docs.sh also invokes it for local runs. Proven to fail on a reintroduced deleted symbol and on a broken link, then revert to green. ruff clean; 128 passed (the +2 guardrail tests), 43 xfailed. Co-Authored-By: Claude Opus 4.8 --- docs/validate_docs.sh | 12 ++++++ tests/test_doc_accuracy.py | 86 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 tests/test_doc_accuracy.py diff --git a/docs/validate_docs.sh b/docs/validate_docs.sh index 8bd3b9b..efc78d5 100755 --- a/docs/validate_docs.sh +++ b/docs/validate_docs.sh @@ -84,6 +84,18 @@ echo "--- Checking template status markers ---" template_count=$(grep -rl '\-\-template\-\-' --include='*.md' . 2>/dev/null | wc -l) echo " INFO: $template_count files still have --template-- status (expected in template repo)" +# 6. Doc-accuracy guardrail (deleted-symbol scan + internal link resolution). +# Implemented as pytest tests so the same check also runs in CI (run_pytest.yml). +echo "--- Running the doc-accuracy guardrail (tests/test_doc_accuracy.py) ---" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +if ( cd "$REPO_ROOT" && PYTHONPATH=. python -m pytest tests/test_doc_accuracy.py -q >/tmp/doc_guardrail.out 2>&1 ); then + echo " OK" +else + echo " ERROR: doc-accuracy guardrail failed:" + sed 's/^/ /' /tmp/doc_guardrail.out + errors=$((errors + 1)) +fi + echo "" if [ "$errors" -gt 0 ]; then echo "=== FAILED: $errors issue(s) found ===" diff --git a/tests/test_doc_accuracy.py b/tests/test_doc_accuracy.py new file mode 100644 index 0000000..58bcc01 --- /dev/null +++ b/tests/test_doc_accuracy.py @@ -0,0 +1,86 @@ +"""Doc-accuracy guardrail (epic #70 / S6). + +The orientation docs went stale silently because nothing checked them against the code. +These tests keep the *living onboarding docs* honest: + +1. they must not reference deleted symbols (the removed runtime mapper / shapefiles / + caching) as if they were current; +2. their internal relative links must resolve. + +Scope note: ADRs, CICs, and the historical reports (the falsification campaign, the +cross-repo report, the ADR-011 assessment) legitimately reference superseded designs as a +*record* — they are excluded from the deleted-symbol scan but still link-checked. A single +intentional historical mention in a living doc can be whitelisted with an inline +``legacy-ok`` marker on that line. +""" + +from __future__ import annotations + +import re +from pathlib import Path + +_REPO = Path(__file__).resolve().parent.parent + +# --- 1. deleted-symbol scan ------------------------------------------------------------- + +# Symbols that name code removed in ADR-011 / C-39. None should appear as *current* in a +# living onboarding doc. +_BANNED = re.compile( + r"PriogridCountryMapper|mapping/README|use_disk_cache|cachetools|geopandas", + re.IGNORECASE, +) + + +def _living_docs() -> list[Path]: + """The onboarding surface that must stay current.""" + docs = [_REPO / "README.md"] + docs += sorted((_REPO / "docs" / "architecture").glob("*.md")) + docs += sorted((_REPO / "views_postprocessing").rglob("README.md")) + return [d for d in docs if d.exists()] + + +def test_living_docs_have_no_deleted_symbol_references(): + offenders = [] + for doc in _living_docs(): + for i, line in enumerate(doc.read_text().splitlines(), start=1): + if "legacy-ok" in line: # explicit opt-out for an intentional historical mention + continue + if _BANNED.search(line): + offenders.append(f"{doc.relative_to(_REPO)}:{i}: {line.strip()}") + assert not offenders, "deleted-symbol references in living docs:\n" + "\n".join(offenders) + + +# --- 2. internal link resolution -------------------------------------------------------- + +# [text](target) — capture the target; we filter out external/anchor links below. +_LINK = re.compile(r"\]\(([^)]+)\)") + + +def _link_checked_docs() -> list[Path]: + """Every markdown doc whose internal links should resolve.""" + docs = [_REPO / "README.md"] + docs += sorted((_REPO / "docs").rglob("*.md")) + docs += sorted((_REPO / "views_postprocessing").rglob("README.md")) + # de-dup while preserving order + seen: set[Path] = set() + out = [] + for d in docs: + if d.exists() and d not in seen: + seen.add(d) + out.append(d) + return out + + +def test_internal_doc_links_resolve(): + dead = [] + for doc in _link_checked_docs(): + for target in _LINK.findall(doc.read_text()): + target = target.strip() + if target.startswith(("http://", "https://", "mailto:", "#")): + continue + path_part = target.split("#", 1)[0] # strip any anchor fragment + if not path_part: + continue + if not (doc.parent / path_part).exists(): + dead.append(f"{doc.relative_to(_REPO)} -> {target}") + assert not dead, "dead internal doc links:\n" + "\n".join(dead)