Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/validate_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ echo "--- Checking template status markers ---"
template_count=$(grep -rl '\-\-template\-\-' --include='*.md' . 2>/dev/null | wc -l)
echo " INFO: $template_count files still have --template-- status (expected in template repo)"

# 6. Doc-accuracy guardrail (deleted-symbol scan + internal link resolution).
# Implemented as pytest tests so the same check also runs in CI (run_pytest.yml).
echo "--- Running the doc-accuracy guardrail (tests/test_doc_accuracy.py) ---"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
if ( cd "$REPO_ROOT" && PYTHONPATH=. python -m pytest tests/test_doc_accuracy.py -q >/tmp/doc_guardrail.out 2>&1 ); then
echo " OK"
else
echo " ERROR: doc-accuracy guardrail failed:"
sed 's/^/ /' /tmp/doc_guardrail.out
errors=$((errors + 1))
fi

echo ""
if [ "$errors" -gt 0 ]; then
echo "=== FAILED: $errors issue(s) found ==="
Expand Down
86 changes: 86 additions & 0 deletions tests/test_doc_accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Doc-accuracy guardrail (epic #70 / S6).

The orientation docs went stale silently because nothing checked them against the code.
These tests keep the *living onboarding docs* honest:

1. they must not reference deleted symbols (the removed runtime mapper / shapefiles /
caching) as if they were current;
2. their internal relative links must resolve.

Scope note: ADRs, CICs, and the historical reports (the falsification campaign, the
cross-repo report, the ADR-011 assessment) legitimately reference superseded designs as a
*record* — they are excluded from the deleted-symbol scan but still link-checked. A single
intentional historical mention in a living doc can be whitelisted with an inline
``legacy-ok`` marker on that line.
"""

from __future__ import annotations

import re
from pathlib import Path

_REPO = Path(__file__).resolve().parent.parent

# --- 1. deleted-symbol scan -------------------------------------------------------------

# Symbols that name code removed in ADR-011 / C-39. None should appear as *current* in a
# living onboarding doc.
_BANNED = re.compile(
r"PriogridCountryMapper|mapping/README|use_disk_cache|cachetools|geopandas",
re.IGNORECASE,
)


def _living_docs() -> list[Path]:
"""The onboarding surface that must stay current."""
docs = [_REPO / "README.md"]
docs += sorted((_REPO / "docs" / "architecture").glob("*.md"))
docs += sorted((_REPO / "views_postprocessing").rglob("README.md"))
return [d for d in docs if d.exists()]


def test_living_docs_have_no_deleted_symbol_references():
offenders = []
for doc in _living_docs():
for i, line in enumerate(doc.read_text().splitlines(), start=1):
if "legacy-ok" in line: # explicit opt-out for an intentional historical mention
continue
if _BANNED.search(line):
offenders.append(f"{doc.relative_to(_REPO)}:{i}: {line.strip()}")
assert not offenders, "deleted-symbol references in living docs:\n" + "\n".join(offenders)


# --- 2. internal link resolution --------------------------------------------------------

# [text](target) — capture the target; we filter out external/anchor links below.
_LINK = re.compile(r"\]\(([^)]+)\)")


def _link_checked_docs() -> list[Path]:
"""Every markdown doc whose internal links should resolve."""
docs = [_REPO / "README.md"]
docs += sorted((_REPO / "docs").rglob("*.md"))
docs += sorted((_REPO / "views_postprocessing").rglob("README.md"))
# de-dup while preserving order
seen: set[Path] = set()
out = []
for d in docs:
if d.exists() and d not in seen:
seen.add(d)
out.append(d)
return out


def test_internal_doc_links_resolve():
dead = []
for doc in _link_checked_docs():
for target in _LINK.findall(doc.read_text()):
target = target.strip()
if target.startswith(("http://", "https://", "mailto:", "#")):
continue
path_part = target.split("#", 1)[0] # strip any anchor fragment
if not path_part:
continue
if not (doc.parent / path_part).exists():
dead.append(f"{doc.relative_to(_REPO)} -> {target}")
assert not dead, "dead internal doc links:\n" + "\n".join(dead)
Loading