From 270ab0efac70af3fb8e1490b92ac7228a9f10dfe Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:00:03 +0300 Subject: [PATCH 01/10] =?UTF-8?q?feat(coverage):=20add=20`coverage`=20tabl?= =?UTF-8?q?e=20+=20SCHEMA=5FVERSION=205=E2=86=926=20(Tracer=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First tracer of the static coverage ingestion plan (docs/plans/coverage-ingestion.md). Pure schema change, no engine yet. What lands: - New `coverage` table with natural-key PK (file_path, name, line_start) per D6. Deliberately NOT a FK to symbols.id: dropAll() drops symbols on every --full reindex and the recreated rows get fresh AUTOINCREMENT ids; CASCADE would wipe coverage on every full rebuild. Natural key sidesteps the entire FK/CASCADE hazard. Orphan cleanup (file deleted from project) lives at the end of every ingest in tracer 2a — exercised by the new test using a raw DELETE so the contract is locked in before the engine code lands. - `idx_coverage_file_name` mirrors the typical join shape (the killer recipe joins symbols ↔ coverage on file_path/name/line_start) and the (file_path, name) prefix also covers the GROUP BY file_path scan used by the bundled files-by-coverage recipe (D2 + D13). - `coverage` table is intentionally absent from `dropAll()` (joins the query_baselines precedent), so user rows survive --full and SCHEMA_VERSION-mismatch rebuilds. - SCHEMA_VERSION bumped 5→6. Existing installs auto-rebuild on next `codemap` run via the schema-mismatch path in createSchema(); the new table is empty until first `codemap ingest-coverage` invocation (tracer 3), which is the correct initial state per D12. - New db.test.ts case exercises the full contract: round-trip of a partial-coverage row set including the total_statements=0 → NULL edge (D5), survival across dropAll() + createTables(), and the orphan- cleanup DELETE that ships in tracer 2a's engine. --- src/db.test.ts | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ src/db.ts | 27 ++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/db.test.ts b/src/db.test.ts index a45e643..ad15a31 100644 --- a/src/db.test.ts +++ b/src/db.test.ts @@ -188,6 +188,106 @@ describe("SQLite layer (in-memory)", () => { } }); + it("coverage table round-trips, survives dropAll(), and orphan sweep removes deleted-file rows", () => { + const db = openCodemapDatabase(":memory:"); + try { + createTables(db); + createIndexes(db); + + // Two files; the second will be deleted to exercise the orphan-cleanup + // DELETE that lives in application/coverage-engine.ts (D6). + for (const path of ["a.ts", "b.ts"]) { + insertFile(db, { + path, + content_hash: `h-${path}`, + size: 1, + line_count: 1, + language: "ts", + last_modified: 0, + indexed_at: 0, + }); + } + + // Natural-key insert (no FK to symbols.id; D6). + db.run( + `INSERT INTO coverage (file_path, name, line_start, coverage_pct, hit_statements, total_statements) + VALUES (?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?)`, + [ + "a.ts", + "fnA", + 1, + 100.0, + 3, + 3, + "a.ts", + "fnB", + 5, + 50.0, + 1, + 2, + "b.ts", + "fnC", + 1, + null, + 0, + 0, // total = 0 → coverage_pct NULL (D5 edge) + ], + ); + + const rows = db + .query( + "SELECT file_path, name, coverage_pct FROM coverage ORDER BY file_path, line_start", + ) + .all() as Array<{ + file_path: string; + name: string; + coverage_pct: number | null; + }>; + expect(rows).toEqual([ + { file_path: "a.ts", name: "fnA", coverage_pct: 100.0 }, + { file_path: "a.ts", name: "fnB", coverage_pct: 50.0 }, + { file_path: "b.ts", name: "fnC", coverage_pct: null }, + ]); + + // dropAll() drops symbols + indices + every CASCADE-bearing table; the + // headline contract for coverage (D6) is that user rows survive that path. + dropAll(db); + createTables(db); + createIndexes(db); + expect( + (db.query("SELECT COUNT(*) AS n FROM coverage").get() as { n: number }) + .n, + ).toBe(3); + + // After --full reindex, files would normally be re-inserted by the + // indexer. Simulate "b.ts was deleted from the project before re-index" + // by re-inserting only a.ts. + insertFile(db, { + path: "a.ts", + content_hash: "h-a.ts", + size: 1, + line_count: 1, + language: "ts", + last_modified: 0, + indexed_at: 0, + }); + + // Orphan-cleanup DELETE — lives in application/coverage-engine.ts at the + // end of every ingest. Exercised here so a future schema refactor can't + // silently break the contract that motivated D6 (no FK / CASCADE). + db.run( + "DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files)", + ); + + const remaining = db + .query("SELECT file_path FROM coverage ORDER BY file_path, line_start") + .all() as Array<{ file_path: string }>; + expect(remaining.map((r) => r.file_path)).toEqual(["a.ts", "a.ts"]); + } finally { + closeDb(db); + } + }); + it("query_baselines survives dropAll() — the schema-rebuild contract", () => { const db = openCodemapDatabase(":memory:"); try { diff --git a/src/db.ts b/src/db.ts index c4480aa..31258ca 100644 --- a/src/db.ts +++ b/src/db.ts @@ -2,7 +2,7 @@ import { openCodemapDatabase } from "./sqlite-db"; import type { CodemapDatabase, BindValues } from "./sqlite-db"; /** Bump on any DDL change; `createSchema()` auto-rebuilds on mismatch. */ -export const SCHEMA_VERSION = 5; +export const SCHEMA_VERSION = 6; export type { CodemapDatabase }; @@ -154,6 +154,26 @@ export function createTables(db: CodemapDatabase) { git_ref TEXT, created_at INTEGER NOT NULL ) STRICT; + + -- User-data table: static coverage snapshots ingested via codemap ingest-coverage + -- (Istanbul coverage-final.json + LCOV lcov.info, written by every modern test + -- runner). Joins to symbols on the natural key (file_path, name, line_start) — + -- intentionally NOT a FK to symbols.id, because dropAll() drops symbols on every + -- --full reindex and the recreated rows get fresh AUTOINCREMENT ids. Natural-key + -- rows survive that churn. Like query_baselines, intentionally excluded from + -- dropAll() so a --full rebuild doesn't nuke the user's last ingest. Orphan + -- cleanup (file deleted from project) lives at the end of every ingest in + -- application/coverage-engine.ts, not here. See docs/plans/coverage-ingestion.md + -- (D6) for the unwind on why CASCADE was rejected. + CREATE TABLE IF NOT EXISTS coverage ( + file_path TEXT NOT NULL, + name TEXT NOT NULL, + line_start INTEGER NOT NULL, + coverage_pct REAL, + hit_statements INTEGER NOT NULL, + total_statements INTEGER NOT NULL, + PRIMARY KEY (file_path, name, line_start) + ) STRICT, WITHOUT ROWID; `); } @@ -201,6 +221,11 @@ export function createIndexes(db: CodemapDatabase) { CREATE INDEX IF NOT EXISTS idx_calls_scope ON calls(caller_scope, file_path, callee_name); CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name, file_path); CREATE INDEX IF NOT EXISTS idx_calls_file ON calls(file_path); + + -- Mirrors the typical join shape symbols.{file_path,name,line_start}; + -- the (file_path, name) prefix also covers GROUP BY file_path scans + -- used by the bundled files-by-coverage recipe (D2 + D13). + CREATE INDEX IF NOT EXISTS idx_coverage_file_name ON coverage(file_path, name); `); } From 6a342423d84f329d7920a605985a1ccde8877573 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:07:07 +0300 Subject: [PATCH 02/10] feat(coverage): shared upsert core + Istanbul parser (Tracer 2a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure engine + format-agnostic write path. No CLI / FS side effects beyond db.run on the caller-supplied SQLite handle. `upsertCoverageRows({db, projectRoot, rows, format, sourcePath})` — shared core consumed by both Istanbul (this commit) and LCOV (next): - Normalises absolute paths via toProjectRelative (D8); files outside the project root land in skipped.unmatched_files. - Loads symbols per-file once and projects each statement onto the innermost enclosing symbol via JS-side (line_end - line_start) ASC tie-break (D7) — avoids the per-statement SQL round-trip the plan flagged as the hot path. - Aggregates per (file_path, name, line_start) bucket; total = 0 → coverage_pct NULL (D5 edge — "untested" ≠ "no testable code"). - Single transaction: per-file DELETE + bulk INSERT for idempotent re-ingest, then orphan-cleanup DELETE (D6 — no FK / CASCADE means deleted-file rows accumulate without this sweep), then writes the three coverage_last_ingested_* meta keys. `ingestIstanbul({db, projectRoot, payload, sourcePath})` — Istanbul parser front-end: - Subset-typed `IstanbulPayload` reads only statementMap + s; ignores fnMap / branchMap / inputSourceMap so the format can grow without churning the signature. - Inner `path` field takes precedence over the absolute-path key (handles webpack-style symlinked paths). - Tolerates malformed file entries (missing statementMap or s) — skips silently rather than throwing, so one corrupt file doesn't poison the whole ingest. 10 unit tests cover both pieces: - Shared core: innermost-wins aggregation, statement-outside-symbol observability, zero-statement-symbol → no row, UPSERT idempotence, orphan cleanup after file deletion, project-root normalisation, unmatched-file skipping, meta-key writes. - Istanbul parser: real-shape payload end-to-end, malformed-entry tolerance. --- src/application/coverage-engine.test.ts | 398 ++++++++++++++++++++++++ src/application/coverage-engine.ts | 282 +++++++++++++++++ 2 files changed, 680 insertions(+) create mode 100644 src/application/coverage-engine.test.ts create mode 100644 src/application/coverage-engine.ts diff --git a/src/application/coverage-engine.test.ts b/src/application/coverage-engine.test.ts new file mode 100644 index 0000000..389d5b3 --- /dev/null +++ b/src/application/coverage-engine.test.ts @@ -0,0 +1,398 @@ +import { describe, expect, it } from "bun:test"; + +import { + closeDb, + createIndexes, + createTables, + insertFile, + insertSymbols, +} from "../db"; +import { openCodemapDatabase } from "../sqlite-db"; +import { ingestIstanbul, upsertCoverageRows } from "./coverage-engine"; +import type { IstanbulPayload } from "./coverage-engine"; + +const PROJECT_ROOT = "/repo"; + +function setupDb() { + const db = openCodemapDatabase(":memory:"); + createTables(db); + createIndexes(db); + return db; +} + +function indexedFile(path: string) { + return { + path, + content_hash: `h-${path}`, + size: 1, + line_count: 100, + language: "ts" as const, + last_modified: 0, + indexed_at: 0, + }; +} + +function fnSym( + file_path: string, + name: string, + line_start: number, + line_end: number, +) { + return { + file_path, + name, + kind: "function", + line_start, + line_end, + signature: `${name}(): void`, + is_exported: 1, + is_default_export: 0, + members: null, + doc_comment: null, + value: null, + parent_name: null, + visibility: null, + }; +} + +describe("coverage-engine", () => { + describe("upsertCoverageRows (shared core)", () => { + it("aggregates statements per innermost symbol and computes pct", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [ + fnSym("a.ts", "outer", 1, 20), + fnSym("a.ts", "inner", 5, 10), + ]); + const result = upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/repo/coverage/coverage-final.json", + rows: [ + { file_path: "a.ts", line: 2, hit_count: 1 }, // outer only + { file_path: "a.ts", line: 6, hit_count: 1 }, // inner (innermost) + { file_path: "a.ts", line: 7, hit_count: 0 }, // inner, miss + { file_path: "a.ts", line: 15, hit_count: 1 }, // outer (after inner range) + ], + }); + expect(result.ingested).toEqual({ symbols: 2, files: 1 }); + expect(result.skipped.statements_no_symbol).toBe(0); + + const rows = db + .query( + "SELECT name, hit_statements, total_statements, coverage_pct FROM coverage ORDER BY name", + ) + .all() as Array<{ + name: string; + hit_statements: number; + total_statements: number; + coverage_pct: number | null; + }>; + // inner: 1 hit / 2 stmts = 50%; outer: 2 hits / 2 stmts = 100% + // (outer's range covers lines 5-10 too, but innermost-wins gave them to inner). + expect(rows).toEqual([ + { + name: "inner", + hit_statements: 1, + total_statements: 2, + coverage_pct: 50, + }, + { + name: "outer", + hit_statements: 2, + total_statements: 2, + coverage_pct: 100, + }, + ]); + } finally { + closeDb(db); + } + }); + + it("statement outside every symbol increments skipped.statements_no_symbol", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [fnSym("a.ts", "fn", 10, 20)]); + const result = upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/repo/coverage-final.json", + rows: [ + { file_path: "a.ts", line: 1, hit_count: 1 }, // top-level expr + { file_path: "a.ts", line: 2, hit_count: 0 }, // side-effect import + { file_path: "a.ts", line: 15, hit_count: 1 }, // inside fn + ], + }); + expect(result.skipped.statements_no_symbol).toBe(2); + expect(result.ingested.symbols).toBe(1); + } finally { + closeDb(db); + } + }); + + it("symbol with zero statements gets no row (NULL via LEFT JOIN)", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [ + fnSym("a.ts", "ifaceLike", 1, 5), // no statements project here + fnSym("a.ts", "fn", 10, 20), + ]); + upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/x", + rows: [{ file_path: "a.ts", line: 12, hit_count: 1 }], + }); + const names = ( + db.query("SELECT name FROM coverage ORDER BY name").all() as Array<{ + name: string; + }> + ).map((r) => r.name); + expect(names).toEqual(["fn"]); + } finally { + closeDb(db); + } + }); + + it("re-ingest replaces per-file rows (UPSERT idempotence)", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [fnSym("a.ts", "fn", 1, 10)]); + const opts = { + db, + projectRoot: PROJECT_ROOT, + format: "istanbul" as const, + sourcePath: "/x", + }; + upsertCoverageRows({ + ...opts, + rows: [{ file_path: "a.ts", line: 2, hit_count: 0 }], + }); + upsertCoverageRows({ + ...opts, + rows: [{ file_path: "a.ts", line: 2, hit_count: 5 }], + }); + const row = db + .query("SELECT hit_statements, coverage_pct FROM coverage") + .get() as { hit_statements: number; coverage_pct: number }; + expect(row).toEqual({ hit_statements: 1, coverage_pct: 100 }); + } finally { + closeDb(db); + } + }); + + it("orphan cleanup drops rows whose file no longer exists", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertFile(db, indexedFile("b.ts")); + insertSymbols(db, [ + fnSym("a.ts", "fnA", 1, 5), + fnSym("b.ts", "fnB", 1, 5), + ]); + upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/x", + rows: [ + { file_path: "a.ts", line: 2, hit_count: 1 }, + { file_path: "b.ts", line: 2, hit_count: 1 }, + ], + }); + // Simulate "b.ts deleted between ingests" by removing the files row. + db.run("DELETE FROM files WHERE path = ?", ["b.ts"]); + const result = upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/x", + rows: [{ file_path: "a.ts", line: 2, hit_count: 1 }], + }); + expect(result.pruned_orphans).toBe(1); + const paths = ( + db.query("SELECT file_path FROM coverage").all() as Array<{ + file_path: string; + }> + ).map((r) => r.file_path); + expect(paths).toEqual(["a.ts"]); + } finally { + closeDb(db); + } + }); + + it("file outside project root → skipped.unmatched_files", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [fnSym("a.ts", "fn", 1, 5)]); + const result = upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/x", + rows: [ + { file_path: "/elsewhere/x.ts", line: 1, hit_count: 1 }, + { file_path: "a.ts", line: 2, hit_count: 1 }, + ], + }); + expect(result.skipped.unmatched_files).toBe(1); + expect(result.ingested.files).toBe(1); + } finally { + closeDb(db); + } + }); + + it("writes the three coverage_last_ingested_* meta keys", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [fnSym("a.ts", "fn", 1, 5)]); + upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "lcov", + sourcePath: "/repo/coverage/lcov.info", + rows: [{ file_path: "a.ts", line: 2, hit_count: 1 }], + }); + const meta = db + .query<{ key: string; value: string }>( + "SELECT key, value FROM meta WHERE key LIKE 'coverage_last_%' ORDER BY key", + ) + .all() as Array<{ key: string; value: string }>; + expect(meta.map((m) => m.key)).toEqual([ + "coverage_last_ingested_at", + "coverage_last_ingested_format", + "coverage_last_ingested_path", + ]); + const map = Object.fromEntries(meta.map((m) => [m.key, m.value])); + expect(map.coverage_last_ingested_format).toBe("lcov"); + expect(map.coverage_last_ingested_path).toBe( + "/repo/coverage/lcov.info", + ); + expect(Number(map.coverage_last_ingested_at)).toBeGreaterThan(0); + } finally { + closeDb(db); + } + }); + + it("normalises absolute paths to project-relative", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("src/api/client.ts")); + insertSymbols(db, [fnSym("src/api/client.ts", "fn", 1, 5)]); + upsertCoverageRows({ + db, + projectRoot: PROJECT_ROOT, + format: "istanbul", + sourcePath: "/x", + rows: [ + { file_path: "/repo/src/api/client.ts", line: 2, hit_count: 1 }, + ], + }); + const path = ( + db.query("SELECT file_path FROM coverage").get() as { + file_path: string; + } + ).file_path; + expect(path).toBe("src/api/client.ts"); + } finally { + closeDb(db); + } + }); + }); + + describe("ingestIstanbul (parser)", () => { + it("parses a real-shape Istanbul payload end-to-end", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("src/lib/cache.ts")); + insertSymbols(db, [ + fnSym("src/lib/cache.ts", "get", 9, 15), + fnSym("src/lib/cache.ts", "invalidate", 17, 23), + ]); + const payload: IstanbulPayload = { + "/repo/src/lib/cache.ts": { + path: "/repo/src/lib/cache.ts", + statementMap: { + "0": { + start: { line: 10, column: 0 }, + end: { line: 10, column: 1 }, + }, + "1": { + start: { line: 11, column: 0 }, + end: { line: 11, column: 1 }, + }, + "2": { + start: { line: 18, column: 0 }, + end: { line: 18, column: 1 }, + }, + }, + s: { "0": 5, "1": 0, "2": 1 }, + }, + }; + const result = ingestIstanbul({ + db, + projectRoot: PROJECT_ROOT, + payload, + sourcePath: "/repo/coverage/coverage-final.json", + }); + expect(result).toMatchObject({ + ingested: { symbols: 2, files: 1 }, + format: "istanbul", + }); + const rows = db + .query( + "SELECT name, hit_statements, total_statements FROM coverage ORDER BY name", + ) + .all() as Array<{ + name: string; + hit_statements: number; + total_statements: number; + }>; + expect(rows).toEqual([ + { name: "get", hit_statements: 1, total_statements: 2 }, + { name: "invalidate", hit_statements: 1, total_statements: 1 }, + ]); + } finally { + closeDb(db); + } + }); + + it("tolerates malformed file entries (missing statementMap or s)", () => { + const db = setupDb(); + try { + insertFile(db, indexedFile("a.ts")); + insertSymbols(db, [fnSym("a.ts", "fn", 1, 5)]); + const result = ingestIstanbul({ + db, + projectRoot: PROJECT_ROOT, + sourcePath: "/x", + payload: { + "/repo/a.ts": { + statementMap: { + "0": { + start: { line: 2, column: 0 }, + end: { line: 2, column: 1 }, + }, + }, + s: { "0": 1 }, + }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + "/repo/broken.ts": { path: "/repo/broken.ts" } as any, + }, + }); + expect(result.ingested.symbols).toBe(1); + } finally { + closeDb(db); + } + }); + }); +}); diff --git a/src/application/coverage-engine.ts b/src/application/coverage-engine.ts new file mode 100644 index 0000000..38c2118 --- /dev/null +++ b/src/application/coverage-engine.ts @@ -0,0 +1,282 @@ +import type { CodemapDatabase } from "../db"; +import { toProjectRelative } from "./validate-engine"; + +/** + * One coverage data point — a single executable statement after the parser + * front-end (Istanbul or LCOV) has normalised its source format. The engine + * is format-agnostic from this point on. + * + * `line` is the source-text line where the statement begins (1-indexed, + * matches `symbols.line_start`). `hit_count` is the number of times the + * statement was executed; `0` means uncovered. + */ +export interface CoverageRow { + file_path: string; + line: number; + hit_count: number; +} + +/** Source format detected by the CLI auto-detector. */ +export type CoverageFormat = "istanbul" | "lcov"; + +export interface IngestResult { + ingested: { symbols: number; files: number }; + skipped: { unmatched_files: number; statements_no_symbol: number }; + pruned_orphans: number; + format: CoverageFormat; +} + +interface UpsertOpts { + db: CodemapDatabase; + projectRoot: string; + rows: CoverageRow[]; + format: CoverageFormat; + /** Absolute path of the source artifact, recorded in `meta` for freshness checks. */ + sourcePath: string; +} + +/** + * Format-agnostic write path. Maps every {@link CoverageRow} to the innermost + * enclosing symbol via the natural-key projection (D7), aggregates per + * `(file_path, name, line_start)`, upserts into `coverage`, writes the three + * `coverage_last_ingested_*` meta keys, and runs the orphan-cleanup DELETE. + * + * Pure with respect to filesystem and process state — every side effect is a + * `db.run` against the in-memory or on-disk SQLite handle the caller passed. + */ +export function upsertCoverageRows(opts: UpsertOpts): IngestResult { + const { db, projectRoot, rows, format, sourcePath } = opts; + + // Normalise paths once up-front. Istanbul writes absolute paths; + // toProjectRelative reuses the same projection validate-engine ships (D8). + // Drop rows whose normalised path escapes the project root (relative path + // would start with `..`) — they get tracked in `skipped.unmatched_files`. + const filesSeen = new Set(); + const filesUnmatched = new Set(); + const normalised: CoverageRow[] = []; + for (const row of rows) { + const rel = toProjectRelative(projectRoot, row.file_path); + if (rel.startsWith("..")) { + filesUnmatched.add(row.file_path); + continue; + } + normalised.push({ ...row, file_path: rel }); + filesSeen.add(rel); + } + + // Inner aggregator: (file_path, name, line_start) → {hit, total}. + // Using a string key keeps the aggregation O(1) per row without spilling + // to a nested Map. + interface SymbolBucket { + file_path: string; + name: string; + line_start: number; + hit_statements: number; + total_statements: number; + } + const buckets = new Map(); + + // Per-file projection cache: symbols of one file are looked up once and + // walked in JS for the innermost-wins selection. Avoids a per-statement + // SQL round-trip (the hot path called out in the plan's perf notes) and + // also lets us implement the tie-break locally. + interface SymbolRange { + name: string; + line_start: number; + line_end: number; + /** `line_end - line_start`; primary sort key for innermost-wins (D7). */ + span: number; + } + const symbolsByFile = new Map(); + + function loadSymbols(file_path: string): SymbolRange[] { + let cached = symbolsByFile.get(file_path); + if (cached) return cached; + cached = ( + db + .query<{ name: string; line_start: number; line_end: number }>( + `SELECT name, line_start, line_end FROM symbols WHERE file_path = ? ORDER BY line_start ASC`, + ) + .all(file_path) ?? [] + ).map((r) => ({ + name: r.name, + line_start: r.line_start, + line_end: r.line_end, + span: r.line_end - r.line_start, + })); + symbolsByFile.set(file_path, cached); + return cached; + } + + let statementsNoSymbol = 0; + for (const row of normalised) { + const symbols = loadSymbols(row.file_path); + let best: SymbolRange | undefined; + for (const sym of symbols) { + if (sym.line_start > row.line) break; // ORDER BY line_start ASC: nothing further can enclose + if (sym.line_end < row.line) continue; + if (!best || sym.span < best.span) best = sym; + } + if (!best) { + statementsNoSymbol++; + continue; + } + const key = `${row.file_path}\u0000${best.name}\u0000${best.line_start}`; + let bucket = buckets.get(key); + if (!bucket) { + bucket = { + file_path: row.file_path, + name: best.name, + line_start: best.line_start, + hit_statements: 0, + total_statements: 0, + }; + buckets.set(key, bucket); + } + bucket.total_statements++; + if (row.hit_count > 0) bucket.hit_statements++; + } + + // Single transaction: clear every existing coverage row for the files + // we're ingesting (a re-ingest is a full replace per file, not a merge), + // then bulk-insert the new aggregates. Idempotent across re-runs. + let pruned = 0; + db.run("BEGIN"); + try { + for (const file_path of filesSeen) { + db.run("DELETE FROM coverage WHERE file_path = ?", [file_path]); + } + for (const bucket of buckets.values()) { + // total = 0 → coverage_pct NULL (D5 edge); "untested" and "no testable + // code" are different signals — never collapse to 0. + const pct = + bucket.total_statements > 0 + ? (bucket.hit_statements / bucket.total_statements) * 100 + : null; + db.run( + `INSERT INTO coverage + (file_path, name, line_start, coverage_pct, hit_statements, total_statements) + VALUES (?, ?, ?, ?, ?, ?)`, + [ + bucket.file_path, + bucket.name, + bucket.line_start, + pct, + bucket.hit_statements, + bucket.total_statements, + ], + ); + } + + // Orphan cleanup (D6) — files that no longer exist in the project drop + // their coverage rows. Lives at the end of every ingest so the + // natural-key trade-off (no FK / CASCADE) doesn't accumulate dead rows. + const beforeOrphans = ( + db.query<{ n: number }>("SELECT COUNT(*) AS n FROM coverage").get() as { + n: number; + } + ).n; + db.run( + "DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files)", + ); + const afterOrphans = ( + db.query<{ n: number }>("SELECT COUNT(*) AS n FROM coverage").get() as { + n: number; + } + ).n; + pruned = beforeOrphans - afterOrphans; + + // Meta keys (single ingest at a time, so per-row `source` would be + // denormalisation noise — D plan). + db.run("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", [ + "coverage_last_ingested_at", + String(Date.now()), + ]); + db.run("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", [ + "coverage_last_ingested_path", + sourcePath, + ]); + db.run("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", [ + "coverage_last_ingested_format", + format, + ]); + + db.run("COMMIT"); + } catch (err) { + db.run("ROLLBACK"); + throw err; + } + + return { + ingested: { symbols: buckets.size, files: filesSeen.size }, + skipped: { + unmatched_files: filesUnmatched.size, + statements_no_symbol: statementsNoSymbol, + }, + pruned_orphans: pruned, + format, + }; +} + +/* ------------------------------------------------------------------ */ +/* Istanbul JSON parser */ +/* ------------------------------------------------------------------ */ + +/** + * Subset of Istanbul's `coverage-final.json` shape we read. Everything we + * don't need (fnMap / branchMap / inputSourceMap / hash) is left untyped + * so the file format can grow without churning this signature. + * + * Statement counts (`s`) are keyed by the same string indices as + * `statementMap`; each value is the times-executed count for that statement. + */ +export interface IstanbulFileCoverage { + path?: string; + statementMap: Record; + s: Record; +} + +interface IstanbulLocation { + start: { line: number; column: number }; + end: { line: number; column: number }; +} + +export type IstanbulPayload = Record; + +interface ParserOpts { + db: CodemapDatabase; + projectRoot: string; + payload: IstanbulPayload; + /** Absolute path the CLI read the JSON from; threaded into `meta`. */ + sourcePath: string; +} + +/** + * Parse an Istanbul payload and dispatch to {@link upsertCoverageRows}. The + * Istanbul shape is keyed by absolute file path; the inner `path` field + * (when present) takes precedence over the key (handles webpack-style + * symlinked paths). + */ +export function ingestIstanbul(opts: ParserOpts): IngestResult { + const { payload, sourcePath, ...rest } = opts; + const rows: CoverageRow[] = []; + for (const [absPath, file] of Object.entries(payload)) { + if (!file?.statementMap || !file?.s) continue; // tolerate malformed entries + const file_path = file.path ?? absPath; + for (const [stmtId, location] of Object.entries(file.statementMap)) { + const hit = file.s[stmtId]; + if (hit === undefined) continue; + rows.push({ + file_path, + line: location.start.line, + hit_count: hit, + }); + } + } + return upsertCoverageRows({ + ...rest, + rows, + format: "istanbul", + sourcePath, + }); +} From 6c679811f810e5ace7a2747b3e0c1238149f1a04 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:10:29 +0300 Subject: [PATCH 03/10] feat(coverage): LCOV parser front-end (Tracer 2b) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the second format axis from the plan — LCOV (`lcov.info`) ingester that shares the entire write path with the Istanbul ingester from Tracer 2a. `ingestLcov({db, projectRoot, payload, sourcePath})` is a pure regex tokenizer over the LCOV record format. Recognised lines: - `SF:` — start of a file record (sets the "current file") - `DA:,[,]` — one statement per record - `end_of_record` — closes the current file record - Comments (`# …`), blank lines, and CRLF endings tolerated Ignored lines (everything else: `TN:`, `FN:`, `FNDA:`, `FNF:`, `FNH:`, `BRDA:`, `BRF:`, `BRH:`, `LF:`, `LH:`) — D5 scope is statement coverage only; the unused records are skipped without churning the parser. Throws on `DA:` outside an `SF:` block (malformed LCOV — the file would have nowhere to attach to). Missing `end_of_record` is tolerated by construction (next `SF:` resets the current file). Five unit tests cover both the LCOV parser in isolation and the cross-format equivalence contract: - `ingestLcov`: well-formed LCOV with multiple SF records → identical-shape coverage rows; CRLF + comments + blank-line tolerance; DA-outside-SF throws; optional `DA:` checksum field parsed. - Cross-format: identical Istanbul + LCOV inputs produce byte-identical rows in the `coverage` table — the contract that lets Tracer 4 ship one set of golden snapshots that asserts both formats land on the same answer. 15 tests total in coverage-engine.test.ts (10 existing + 5 LCOV). Zero new write-side code — the LCOV parser is pure parse-and-normalise into the same `CoverageRow[]` the Istanbul parser produces, then both hand off to `upsertCoverageRows`. --- src/application/coverage-engine.test.ts | 194 +++++++++++++++++++++++- src/application/coverage-engine.ts | 71 +++++++++ 2 files changed, 264 insertions(+), 1 deletion(-) diff --git a/src/application/coverage-engine.test.ts b/src/application/coverage-engine.test.ts index 389d5b3..7908dc5 100644 --- a/src/application/coverage-engine.test.ts +++ b/src/application/coverage-engine.test.ts @@ -8,7 +8,11 @@ import { insertSymbols, } from "../db"; import { openCodemapDatabase } from "../sqlite-db"; -import { ingestIstanbul, upsertCoverageRows } from "./coverage-engine"; +import { + ingestIstanbul, + ingestLcov, + upsertCoverageRows, +} from "./coverage-engine"; import type { IstanbulPayload } from "./coverage-engine"; const PROJECT_ROOT = "/repo"; @@ -366,6 +370,61 @@ describe("coverage-engine", () => { } }); + it("Istanbul + LCOV produce identical rows for equivalent inputs (cross-format equivalence)", () => { + const istanbulDb = setupDb(); + const lcovDb = setupDb(); + try { + for (const db of [istanbulDb, lcovDb]) { + insertFile(db, indexedFile("src/lib/cache.ts")); + insertSymbols(db, [fnSym("src/lib/cache.ts", "get", 9, 15)]); + } + ingestIstanbul({ + db: istanbulDb, + projectRoot: PROJECT_ROOT, + sourcePath: "/repo/coverage-final.json", + payload: { + "/repo/src/lib/cache.ts": { + path: "/repo/src/lib/cache.ts", + statementMap: { + "0": { + start: { line: 10, column: 0 }, + end: { line: 10, column: 1 }, + }, + "1": { + start: { line: 11, column: 0 }, + end: { line: 11, column: 1 }, + }, + }, + s: { "0": 5, "1": 0 }, + }, + }, + }); + ingestLcov({ + db: lcovDb, + projectRoot: PROJECT_ROOT, + sourcePath: "/repo/lcov.info", + payload: [ + "TN:", + "SF:/repo/src/lib/cache.ts", + "DA:10,5", + "DA:11,0", + "end_of_record", + "", + ].join("\n"), + }); + const cols = + "file_path, name, line_start, hit_statements, total_statements, coverage_pct"; + const istanbulRows = istanbulDb + .query(`SELECT ${cols} FROM coverage`) + .all(); + const lcovRows = lcovDb.query(`SELECT ${cols} FROM coverage`).all(); + expect(lcovRows).toEqual(istanbulRows); + } finally { + closeDb(istanbulDb); + closeDb(lcovDb); + } + }); + it("tolerates malformed file entries (missing statementMap or s)", () => { const db = setupDb(); try { @@ -395,4 +454,137 @@ describe("coverage-engine", () => { } }); }); + + describe("ingestLcov (parser)", () => { + function lcovDb() { + const db = setupDb(); + insertFile(db, indexedFile("src/api/client.ts")); + insertSymbols(db, [ + fnSym("src/api/client.ts", "fetchUser", 5, 12), + fnSym("src/api/client.ts", "fetchPosts", 14, 20), + ]); + return db; + } + + it("parses well-formed LCOV with multiple SF records", () => { + const db = lcovDb(); + try { + const lcov = [ + "TN:", + "SF:/repo/src/api/client.ts", + "FN:5,fetchUser", + "FN:14,fetchPosts", + "DA:6,3", + "DA:7,3", + "DA:15,0", + "DA:16,0", + "LF:4", + "LH:2", + "end_of_record", + "", + ].join("\n"); + const result = ingestLcov({ + db, + projectRoot: PROJECT_ROOT, + sourcePath: "/repo/lcov.info", + payload: lcov, + }); + expect(result.format).toBe("lcov"); + expect(result.ingested).toEqual({ symbols: 2, files: 1 }); + const rows = db + .query( + "SELECT name, hit_statements, total_statements, coverage_pct FROM coverage ORDER BY name", + ) + .all() as Array<{ + name: string; + hit_statements: number; + total_statements: number; + coverage_pct: number; + }>; + expect(rows).toEqual([ + { + name: "fetchPosts", + hit_statements: 0, + total_statements: 2, + coverage_pct: 0, + }, + { + name: "fetchUser", + hit_statements: 2, + total_statements: 2, + coverage_pct: 100, + }, + ]); + } finally { + closeDb(db); + } + }); + + it("ignores TN/FN/FNDA/BRDA/LF/LH and supports CRLF + comments + blank lines", () => { + const db = lcovDb(); + try { + const lcov = [ + "# header comment", + "TN:test-suite", + "", + "SF:/repo/src/api/client.ts", + "FN:5,fetchUser", + "FNDA:1,fetchUser", + "FNF:1", + "FNH:1", + "DA:6,1", + "BRDA:6,0,0,1", + "BRF:1", + "BRH:1", + "LF:1", + "LH:1", + "end_of_record", + ].join("\r\n"); + const result = ingestLcov({ + db, + projectRoot: PROJECT_ROOT, + sourcePath: "/x", + payload: lcov, + }); + expect(result.ingested.symbols).toBe(1); + } finally { + closeDb(db); + } + }); + + it("DA: outside SF: block throws (malformed)", () => { + const db = lcovDb(); + try { + expect(() => + ingestLcov({ + db, + projectRoot: PROJECT_ROOT, + sourcePath: "/x", + payload: "DA:1,1\n", + }), + ).toThrow(/DA: record outside SF: block/); + } finally { + closeDb(db); + } + }); + + it("DA: with optional checksum (third comma-field) is parsed", () => { + const db = lcovDb(); + try { + const result = ingestLcov({ + db, + projectRoot: PROJECT_ROOT, + sourcePath: "/x", + payload: [ + "SF:/repo/src/api/client.ts", + "DA:6,3,abc1234checksum", + "end_of_record", + ].join("\n"), + }); + expect(result.ingested.symbols).toBe(1); + } finally { + closeDb(db); + } + }); + }); }); diff --git a/src/application/coverage-engine.ts b/src/application/coverage-engine.ts index 38c2118..b0d89ca 100644 --- a/src/application/coverage-engine.ts +++ b/src/application/coverage-engine.ts @@ -280,3 +280,74 @@ export function ingestIstanbul(opts: ParserOpts): IngestResult { sourcePath, }); } + +/* ------------------------------------------------------------------ */ +/* LCOV parser */ +/* ------------------------------------------------------------------ */ + +interface LcovParserOpts { + db: CodemapDatabase; + projectRoot: string; + /** Raw LCOV text (read by the CLI from `lcov.info`). */ + payload: string; + sourcePath: string; +} + +/** + * Parse an LCOV record stream and dispatch to {@link upsertCoverageRows}. + * + * Recognised lines (everything else — `TN:` / `FN:` / `FNDA:` / `FNF:` / + * `FNH:` / `BRDA:` / `BRF:` / `BRH:` / `LF:` / `LH:` — is ignored; we only + * need statement coverage in v1 per D5): + * - `SF:` — start of a file record; sets the "current file" + * - `DA:,[,]` — one statement per record + * - `end_of_record` — closes the current file record + * + * Throws when a `DA:` line appears outside an `SF:` block (malformed + * LCOV — the file would have nowhere to attach to). Missing + * `end_of_record` is tolerated (the last block flushes implicitly when + * the next `SF:` arrives or the input ends). + */ +export function ingestLcov(opts: LcovParserOpts): IngestResult { + const { payload, sourcePath, ...rest } = opts; + const rows: CoverageRow[] = []; + let currentFile: string | undefined; + let lineNumber = 0; + for (const rawLine of payload.split(/\r?\n/)) { + lineNumber++; + const line = rawLine.trim(); + if (line === "" || line.startsWith("#")) continue; + if (line.startsWith("SF:")) { + currentFile = line.slice(3); + continue; + } + if (line === "end_of_record") { + currentFile = undefined; + continue; + } + if (line.startsWith("DA:")) { + if (!currentFile) { + throw new Error( + `LCOV parse error at line ${lineNumber}: DA: record outside SF: block`, + ); + } + // DA:,[,] + const parts = line.slice(3).split(","); + const lineNum = Number.parseInt(parts[0] ?? "", 10); + const hitCount = Number.parseInt(parts[1] ?? "", 10); + if (!Number.isFinite(lineNum) || !Number.isFinite(hitCount)) continue; + rows.push({ + file_path: currentFile, + line: lineNum, + hit_count: hitCount, + }); + } + // Everything else (TN:, FN:, BRDA:, etc.) silently skipped per D5. + } + return upsertCoverageRows({ + ...rest, + rows, + format: "lcov", + sourcePath, + }); +} From ac4f659fa8e0fd1a637037c3f1fea427e5a3a7d8 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:14:02 +0300 Subject: [PATCH 04/10] feat(coverage): codemap ingest-coverage CLI verb (Tracer 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the engine (Tracer 2a/2b) into the CLI surface end-to-end. Smoke- tested against a real Istanbul payload pointed at src/db.ts: ingest writes 2 symbols (openDb 100%, closeDb 0%); query --json reads them back with the expected coverage_pct shape. cli/cmd-ingest-coverage.ts: - Single positional arg + --json. No --source flag (per plan: format is auto-detectable from extension; flag noise dropped). - resolveArtifact() probes the path: .json → istanbul, .info → lcov, directory → looks for both filenames, errors if both or neither present (no precedence guessing — explicit is better than implicit). - File reads use the canonical Node-vs-Bun split: Bun.file().json() / .text() on Bun (native parser perf for multi-MB Istanbul payloads), readFile + JSON.parse on Node. Mirrors config.ts. See packaging.md. - Plain-text terminal output by default; --json emits the IngestResult envelope. Errors emit {"error":"…"} on stdout under --json, plain message on stderr otherwise. Sets process.exitCode (no process.exit) so piped stdout isn't truncated. cli/main.ts: dispatch new verb between snippet and query (lazy import matches every other cmd). cli/bootstrap.ts: validateIndexModeArgs() recognises ingest-coverage as a known subcommand; printCliUsage() lists it in a new "Coverage ingest" section. 7 parser tests cover all paths: requires the subcommand position, help on --help/-h, single-path default, --json flag, missing-path error, unknown-option error, multiple-paths error. Engine + CLI smoke verified locally: $ bun src/index.ts ingest-coverage /tmp/cov.json --json {"ingested":{"symbols":2,"files":1},"skipped":...,"format":"istanbul"} --- src/cli/bootstrap.ts | 4 + src/cli/cmd-ingest-coverage.test.ts | 63 ++++++++ src/cli/cmd-ingest-coverage.ts | 227 ++++++++++++++++++++++++++++ src/cli/main.ts | 25 +++ 4 files changed, 319 insertions(+) create mode 100644 src/cli/cmd-ingest-coverage.test.ts create mode 100644 src/cli/cmd-ingest-coverage.ts diff --git a/src/cli/bootstrap.ts b/src/cli/bootstrap.ts index 664d5a9..6ccb0e6 100644 --- a/src/cli/bootstrap.ts +++ b/src/cli/bootstrap.ts @@ -42,6 +42,9 @@ Targeted reads (precise lookup by symbol name): Impact analysis (graph walk for refactor blast-radius): codemap impact [--direction up|down|both] [--depth N] [--via ] [--limit N] [--summary] [--json] +Coverage ingest (Istanbul JSON or LCOV from any test runner): + codemap ingest-coverage [--json] # path = file or dir; format auto-detected + Other: codemap version codemap --version, -V @@ -77,6 +80,7 @@ export function validateIndexModeArgs(rest: string[]): void { if (rest[0] === "show") return; if (rest[0] === "snippet") return; if (rest[0] === "impact") return; + if (rest[0] === "ingest-coverage") return; if (rest[0] === "agents") { if (rest[1] === "init") return; diff --git a/src/cli/cmd-ingest-coverage.test.ts b/src/cli/cmd-ingest-coverage.test.ts new file mode 100644 index 0000000..898f249 --- /dev/null +++ b/src/cli/cmd-ingest-coverage.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from "bun:test"; + +import { parseIngestCoverageRest } from "./cmd-ingest-coverage"; + +describe("parseIngestCoverageRest", () => { + it("requires the ingest-coverage subcommand position", () => { + expect(() => parseIngestCoverageRest(["query"])).toThrow(); + }); + + it("returns help on --help / -h", () => { + expect(parseIngestCoverageRest(["ingest-coverage", "--help"])).toEqual({ + kind: "help", + }); + expect(parseIngestCoverageRest(["ingest-coverage", "-h"])).toEqual({ + kind: "help", + }); + }); + + it("parses a single path with default --json=false", () => { + expect( + parseIngestCoverageRest([ + "ingest-coverage", + "coverage/coverage-final.json", + ]), + ).toEqual({ + kind: "run", + path: "coverage/coverage-final.json", + json: false, + }); + }); + + it("parses --json", () => { + expect( + parseIngestCoverageRest(["ingest-coverage", "coverage", "--json"]), + ).toEqual({ kind: "run", path: "coverage", json: true }); + }); + + it("rejects missing path", () => { + const r = parseIngestCoverageRest(["ingest-coverage"]); + expect(r.kind).toBe("error"); + }); + + it("rejects unknown options", () => { + const r = parseIngestCoverageRest([ + "ingest-coverage", + "x", + "--source", + "lcov", + ]); + expect(r.kind).toBe("error"); + if (r.kind === "error") { + expect(r.message).toMatch(/unknown option.*--source/); + } + }); + + it("rejects multiple paths", () => { + const r = parseIngestCoverageRest(["ingest-coverage", "a.json", "b.json"]); + expect(r.kind).toBe("error"); + if (r.kind === "error") { + expect(r.message).toMatch(/unexpected extra argument/); + } + }); +}); diff --git a/src/cli/cmd-ingest-coverage.ts b/src/cli/cmd-ingest-coverage.ts new file mode 100644 index 0000000..1e7c068 --- /dev/null +++ b/src/cli/cmd-ingest-coverage.ts @@ -0,0 +1,227 @@ +import { existsSync, statSync } from "node:fs"; +import { readFile } from "node:fs/promises"; +import { isAbsolute, join, resolve } from "node:path"; + +import { ingestIstanbul, ingestLcov } from "../application/coverage-engine"; +import type { + CoverageFormat, + IngestResult, + IstanbulPayload, +} from "../application/coverage-engine"; +import { closeDb, openDb } from "../db"; +import { bootstrapCodemap } from "./bootstrap-codemap"; + +interface IngestCoverageOpts { + root: string; + configFile: string | undefined; + stateDir?: string | undefined; + /** Resolved absolute path to coverage-final.json, lcov.info, or a directory. */ + path: string; + json: boolean; +} + +const ISTANBUL_FILENAME = "coverage-final.json"; +const LCOV_FILENAME = "lcov.info"; + +export function printIngestCoverageCmdHelp(): void { + console.log(`Usage: codemap ingest-coverage [--json] + +Ingest a static coverage artifact into the index so structural queries +can compose coverage filters in pure SQL. No test runner is invoked — +codemap reads what \`bun test\`, \`vitest\`, \`jest\`, \`c8\`, \`nyc\` +already produce. + +Args: + Path to one of: + - coverage-final.json (Istanbul) + - lcov.info (LCOV; e.g. \`bun test --coverage\`) + - a directory containing exactly one of the above + +Format auto-detected from filename / extension. Errors if a directory +holds both \`coverage-final.json\` and \`lcov.info\` (no precedence guess). + +Flags: + --json Emit the result envelope on stdout. Default: human text. + --help, -h Show this help. + +Output (JSON): + { "format": "istanbul"|"lcov", + "ingested": { "symbols": N, "files": M }, + "skipped": { "unmatched_files": K, "statements_no_symbol": S }, + "pruned_orphans": O } + +Examples: + codemap ingest-coverage coverage/coverage-final.json + codemap ingest-coverage coverage/lcov.info + codemap ingest-coverage coverage --json +`); +} + +export function parseIngestCoverageRest( + rest: string[], +): + | { kind: "help" } + | { kind: "error"; message: string } + | { kind: "run"; path: string; json: boolean } { + if (rest[0] !== "ingest-coverage") { + throw new Error("parseIngestCoverageRest: expected ingest-coverage"); + } + let path: string | undefined; + let json = false; + for (let i = 1; i < rest.length; i++) { + const a = rest[i]!; + if (a === "--help" || a === "-h") return { kind: "help" }; + if (a === "--json") { + json = true; + continue; + } + if (a.startsWith("-")) { + return { + kind: "error", + message: `codemap ingest-coverage: unknown option "${a}". Run \`codemap ingest-coverage --help\` for usage.`, + }; + } + if (path !== undefined) { + return { + kind: "error", + message: `codemap ingest-coverage: unexpected extra argument "${a}". Pass exactly one path.`, + }; + } + path = a; + } + if (path === undefined) { + return { + kind: "error", + message: `codemap ingest-coverage: missing . Run \`codemap ingest-coverage --help\` for usage.`, + }; + } + return { kind: "run", path, json }; +} + +/** + * Resolve the user-supplied path to a concrete (artifact, format) pair. + * Directory inputs probe for `coverage-final.json` and `lcov.info`; + * presence of both is an explicit error per the plan ("no precedence + * guessing — explicit is better than implicit"). + */ +function resolveArtifact( + inputPath: string, + cwd: string, +): { format: CoverageFormat; absPath: string } { + const abs = isAbsolute(inputPath) ? inputPath : resolve(cwd, inputPath); + if (!existsSync(abs)) { + throw new Error(`codemap ingest-coverage: path not found: ${abs}`); + } + const stat = statSync(abs); + if (stat.isDirectory()) { + const istanbul = join(abs, ISTANBUL_FILENAME); + const lcov = join(abs, LCOV_FILENAME); + const hasIstanbul = existsSync(istanbul); + const hasLcov = existsSync(lcov); + if (hasIstanbul && hasLcov) { + throw new Error( + `codemap ingest-coverage: directory ${abs} contains both ${ISTANBUL_FILENAME} and ${LCOV_FILENAME}. Pass the file path explicitly.`, + ); + } + if (hasIstanbul) return { format: "istanbul", absPath: istanbul }; + if (hasLcov) return { format: "lcov", absPath: lcov }; + throw new Error( + `codemap ingest-coverage: directory ${abs} contains neither ${ISTANBUL_FILENAME} nor ${LCOV_FILENAME}.`, + ); + } + if (abs.endsWith(".json")) return { format: "istanbul", absPath: abs }; + if (abs.endsWith(".info")) return { format: "lcov", absPath: abs }; + throw new Error( + `codemap ingest-coverage: cannot auto-detect format from "${abs}". Expected a .json (Istanbul) or .info (LCOV) file, or a directory containing one.`, + ); +} + +/** + * Read a JSON file via the canonical Node-vs-Bun split — Bun.file().json() + * uses Bun's native parser (materially faster on multi-MB Istanbul payloads); + * Node falls through to readFile + JSON.parse. Mirrors `config.ts`. + * See docs/packaging.md § Node vs Bun. + */ +async function readJsonFile(filePath: string): Promise { + if (typeof Bun !== "undefined") { + return Bun.file(filePath).json(); + } + const text = await readFile(filePath, "utf-8"); + return JSON.parse(text) as unknown; +} + +async function readTextFile(filePath: string): Promise { + if (typeof Bun !== "undefined") { + return Bun.file(filePath).text(); + } + return readFile(filePath, "utf-8"); +} + +export async function runIngestCoverageCmd( + opts: IngestCoverageOpts, +): Promise { + try { + await bootstrapCodemap(opts); + const { format, absPath } = resolveArtifact(opts.path, opts.root); + + let result: IngestResult; + const db = openDb(); + try { + if (format === "istanbul") { + const payload = (await readJsonFile(absPath)) as IstanbulPayload; + result = ingestIstanbul({ + db, + projectRoot: opts.root, + payload, + sourcePath: absPath, + }); + } else { + const payload = await readTextFile(absPath); + result = ingestLcov({ + db, + projectRoot: opts.root, + payload, + sourcePath: absPath, + }); + } + } finally { + closeDb(db); + } + + if (opts.json) { + console.log(JSON.stringify(result)); + return; + } + renderTerminal(result, absPath); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (opts.json) { + console.log(JSON.stringify({ error: msg })); + } else { + console.error(msg); + } + process.exitCode = 1; + } +} + +function renderTerminal(result: IngestResult, sourcePath: string): void { + console.log(`# ingest-coverage format=${result.format} source=${sourcePath}`); + console.log( + ` ingested: ${result.ingested.symbols} symbols / ${result.ingested.files} files`, + ); + if (result.skipped.unmatched_files > 0) { + console.log( + ` skipped: ${result.skipped.unmatched_files} unmatched file(s) outside project root`, + ); + } + if (result.skipped.statements_no_symbol > 0) { + console.log( + ` skipped: ${result.skipped.statements_no_symbol} statement(s) outside any symbol range`, + ); + } + if (result.pruned_orphans > 0) { + console.log( + ` pruned: ${result.pruned_orphans} orphan row(s) for files no longer in the index`, + ); + } +} diff --git a/src/cli/main.ts b/src/cli/main.ts index 8730ea0..84840f0 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -280,6 +280,31 @@ Copies bundled agent templates into .agents/ under the project root. return; } + if (rest[0] === "ingest-coverage") { + const { + parseIngestCoverageRest, + printIngestCoverageCmdHelp, + runIngestCoverageCmd, + } = await import("./cmd-ingest-coverage.js"); + const parsed = parseIngestCoverageRest(rest); + if (parsed.kind === "help") { + printIngestCoverageCmdHelp(); + return; + } + if (parsed.kind === "error") { + console.error(parsed.message); + process.exit(1); + } + await runIngestCoverageCmd({ + root, + configFile, + stateDir, + path: parsed.path, + json: parsed.json, + }); + return; + } + if (rest[0] === "query") { const { parseQueryRest, From 7c5771a1aef925c672ca7ada364295de7fab1828 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:22:00 +0300 Subject: [PATCH 05/10] feat(coverage): bundled recipes + fixture coverage data + goldens (Tracer 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the agent-surface contract from D13: every common coverage question gets a `--recipe` verb. Three v1 recipes ship under templates/recipes/, auto-discovered by the existing recipes-loader (`bun src/index.ts query --recipes-json` lists 15 recipes total — was 12). Bundled recipes: - `untested-and-dead.{sql,md}` — the killer recipe. Exported functions with no callers AND zero coverage. Recipe MD documents the v1 name-collision lossiness (D11) and three concrete narrowing patterns agents can apply: scope by file_path prefix, exclude default exports (Next.js entry points), restrict to public visibility. - `files-by-coverage.{sql,md}` — files ranked ascending by statement coverage. Replaces the deferred `file_coverage` rollup table (D2): GROUP BY on the symbol-level table is index-bounded by `idx_coverage_file_name`. NULL coverage_pct (zero-statement files) sorted last to avoid drowning out actual zero-coverage files. - `worst-covered-exports.{sql,md}` — top-20 worst-covered exported functions (LIMIT in the SQL, not configurable in v1 — config-driven LIMIT defers until a real consumer asks). Each recipe ships a frontmatter `actions` block (per PR #26) so agents get a per-row follow-up hint in `--json` output. Fixture data: - fixtures/minimal/coverage/coverage-final.json (Istanbul) and fixtures/minimal/coverage/lcov.info (LCOV) — equivalent partial coverage shape covering 10 fixture symbols across 6 files. Both formats use project-relative paths so they work without sed. Golden runner extension (scripts/query-golden.ts + schema.ts): - New `setup` array in scenarios.json runs one-time setup steps after `cm.index()` and before scenarios. Today: `{kind: "ingest-coverage", path: "..."}` — engine auto-detects format by extension. - Schema is backward-compatible: parser accepts either the legacy flat array OR the new `{setup?, scenarios}` object via z.union. - Setup dispatch reuses the engine functions directly (ingestIstanbul, ingestLcov) — same write path the CLI verb uses. 5 new golden snapshots prove the surface end-to-end on the fixture corpus: - coverage-rows-after-ingest.json: raw `coverage` table contents - untested-and-dead.json: 6 dead+untested symbols (incl. legacyClient + epochMs, both @deprecated) - files-by-coverage.json: 6 files ranked from 0% (api/client.ts) to 100% (ProductCard, usePermissions) - worst-covered-exports.json: top exported functions with coverage_pct - (LCOV cross-format equivalence proven by engine unit test, not duplicated in goldens — single setup step per scenarios.json keeps the runner simple) fixtures/minimal/README.md: new "What's exercised" row + Use section with `bun src/index.ts ingest-coverage` worked examples. All 24 golden scenarios pass. Engine unit tests still 15 pass. --- .../minimal/coverage-rows-after-ingest.json | 72 ++++++ .../golden/minimal/files-by-coverage.json | 38 +++ fixtures/golden/minimal/files-hashes.json | 4 +- .../golden/minimal/untested-and-dead.json | 38 +++ .../golden/minimal/worst-covered-exports.json | 122 ++++++++++ fixtures/golden/scenarios.json | 219 ++++++++++-------- fixtures/minimal/README.md | 9 + fixtures/minimal/coverage/coverage-final.json | 51 ++++ fixtures/minimal/coverage/lcov.info | 43 ++++ scripts/query-golden.ts | 51 +++- scripts/query-golden/schema.ts | 35 ++- templates/recipes/files-by-coverage.md | 14 ++ templates/recipes/files-by-coverage.sql | 12 + templates/recipes/untested-and-dead.md | 21 ++ templates/recipes/untested-and-dead.sql | 16 ++ templates/recipes/worst-covered-exports.md | 14 ++ templates/recipes/worst-covered-exports.sql | 14 ++ 17 files changed, 669 insertions(+), 104 deletions(-) create mode 100644 fixtures/golden/minimal/coverage-rows-after-ingest.json create mode 100644 fixtures/golden/minimal/files-by-coverage.json create mode 100644 fixtures/golden/minimal/untested-and-dead.json create mode 100644 fixtures/golden/minimal/worst-covered-exports.json create mode 100644 fixtures/minimal/coverage/coverage-final.json create mode 100644 fixtures/minimal/coverage/lcov.info create mode 100644 templates/recipes/files-by-coverage.md create mode 100644 templates/recipes/files-by-coverage.sql create mode 100644 templates/recipes/untested-and-dead.md create mode 100644 templates/recipes/untested-and-dead.sql create mode 100644 templates/recipes/worst-covered-exports.md create mode 100644 templates/recipes/worst-covered-exports.sql diff --git a/fixtures/golden/minimal/coverage-rows-after-ingest.json b/fixtures/golden/minimal/coverage-rows-after-ingest.json new file mode 100644 index 0000000..abd5c0e --- /dev/null +++ b/fixtures/golden/minimal/coverage-rows-after-ingest.json @@ -0,0 +1,72 @@ +[ + { + "file_path": "src/api/client.ts", + "name": "legacyClient", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/components/shop/ProductCard.tsx", + "name": "ProductCard", + "hit_statements": 3, + "total_statements": 3, + "coverage_pct": 100 + }, + { + "file_path": "src/components/shop/ShopButton.tsx", + "name": "FormatPrice", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/components/shop/ShopButton.tsx", + "name": "ShopButton", + "hit_statements": 2, + "total_statements": 2, + "coverage_pct": 100 + }, + { + "file_path": "src/usePermissions.ts", + "name": "usePermissions", + "hit_statements": 1, + "total_statements": 1, + "coverage_pct": 100 + }, + { + "file_path": "src/utils/date.ts", + "name": "now", + "hit_statements": 1, + "total_statements": 1, + "coverage_pct": 100 + }, + { + "file_path": "src/utils/date.ts", + "name": "_epochSeconds", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/utils/date.ts", + "name": "nanoseconds", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/utils/format.ts", + "name": "epochMs", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/utils/format.ts", + "name": "nowIso", + "hit_statements": 1, + "total_statements": 1, + "coverage_pct": 100 + } +] diff --git a/fixtures/golden/minimal/files-by-coverage.json b/fixtures/golden/minimal/files-by-coverage.json new file mode 100644 index 0000000..7a24c45 --- /dev/null +++ b/fixtures/golden/minimal/files-by-coverage.json @@ -0,0 +1,38 @@ +[ + { + "file_path": "src/api/client.ts", + "hit_statements": 0, + "total_statements": 1, + "coverage_pct": 0 + }, + { + "file_path": "src/utils/date.ts", + "hit_statements": 1, + "total_statements": 3, + "coverage_pct": 33.33 + }, + { + "file_path": "src/utils/format.ts", + "hit_statements": 1, + "total_statements": 2, + "coverage_pct": 50 + }, + { + "file_path": "src/components/shop/ShopButton.tsx", + "hit_statements": 2, + "total_statements": 3, + "coverage_pct": 66.67 + }, + { + "file_path": "src/components/shop/ProductCard.tsx", + "hit_statements": 3, + "total_statements": 3, + "coverage_pct": 100 + }, + { + "file_path": "src/usePermissions.ts", + "hit_statements": 1, + "total_statements": 1, + "coverage_pct": 100 + } +] diff --git a/fixtures/golden/minimal/files-hashes.json b/fixtures/golden/minimal/files-hashes.json index 0f6497f..b5a8ab2 100644 --- a/fixtures/golden/minimal/files-hashes.json +++ b/fixtures/golden/minimal/files-hashes.json @@ -7,9 +7,9 @@ }, { "path": "README.md", - "content_hash": "75032b11e435e7f265151787faf72c59a986b491ba4e866856ba77eda6b4deb4", + "content_hash": "6333c6bbc9240f2df77da0b60e4c8653a20df7c116531616c1b25d6ceb1521b6", "language": "md", - "line_count": 41 + "line_count": 50 }, { "path": "package.json", diff --git a/fixtures/golden/minimal/untested-and-dead.json b/fixtures/golden/minimal/untested-and-dead.json new file mode 100644 index 0000000..e6f870a --- /dev/null +++ b/fixtures/golden/minimal/untested-and-dead.json @@ -0,0 +1,38 @@ +[ + { + "name": "legacyClient", + "file_path": "src/api/client.ts", + "line_start": 41, + "coverage_pct": 0 + }, + { + "name": "FormatPrice", + "file_path": "src/components/shop/ShopButton.tsx", + "line_start": 3, + "coverage_pct": 0 + }, + { + "name": "run", + "file_path": "src/consumer.ts", + "line_start": 10, + "coverage_pct": 0 + }, + { + "name": "_epochSeconds", + "file_path": "src/utils/date.ts", + "line_start": 12, + "coverage_pct": 0 + }, + { + "name": "nanoseconds", + "file_path": "src/utils/date.ts", + "line_start": 19, + "coverage_pct": 0 + }, + { + "name": "_hiResEpoch", + "file_path": "src/utils/date.ts", + "line_start": 26, + "coverage_pct": 0 + } +] diff --git a/fixtures/golden/minimal/worst-covered-exports.json b/fixtures/golden/minimal/worst-covered-exports.json new file mode 100644 index 0000000..541eab6 --- /dev/null +++ b/fixtures/golden/minimal/worst-covered-exports.json @@ -0,0 +1,122 @@ +[ + { + "name": "createClient", + "file_path": "src/api/client.ts", + "line_start": 15, + "coverage_pct": 0 + }, + { + "name": "setupTransport", + "file_path": "src/api/client.ts", + "line_start": 22, + "coverage_pct": 0 + }, + { + "name": "openSocket", + "file_path": "src/api/client.ts", + "line_start": 28, + "coverage_pct": 0 + }, + { + "name": "handshake", + "file_path": "src/api/client.ts", + "line_start": 32, + "coverage_pct": 0 + }, + { + "name": "legacyClient", + "file_path": "src/api/client.ts", + "line_start": 41, + "coverage_pct": 0 + }, + { + "name": "FormatPrice", + "file_path": "src/components/shop/ShopButton.tsx", + "line_start": 3, + "coverage_pct": 0 + }, + { + "name": "run", + "file_path": "src/consumer.ts", + "line_start": 10, + "coverage_pct": 0 + }, + { + "name": "get", + "file_path": "src/lib/cache.ts", + "line_start": 8, + "coverage_pct": 0 + }, + { + "name": "invalidate", + "file_path": "src/lib/cache.ts", + "line_start": 16, + "coverage_pct": 0 + }, + { + "name": "read", + "file_path": "src/lib/store.ts", + "line_start": 7, + "coverage_pct": 0 + }, + { + "name": "write", + "file_path": "src/lib/store.ts", + "line_start": 11, + "coverage_pct": 0 + }, + { + "name": "_epochSeconds", + "file_path": "src/utils/date.ts", + "line_start": 12, + "coverage_pct": 0 + }, + { + "name": "nanoseconds", + "file_path": "src/utils/date.ts", + "line_start": 19, + "coverage_pct": 0 + }, + { + "name": "_hiResEpoch", + "file_path": "src/utils/date.ts", + "line_start": 26, + "coverage_pct": 0 + }, + { + "name": "epochMs", + "file_path": "src/utils/format.ts", + "line_start": 5, + "coverage_pct": 0 + }, + { + "name": "ProductCard", + "file_path": "src/components/shop/ProductCard.tsx", + "line_start": 11, + "coverage_pct": 100 + }, + { + "name": "ShopButton", + "file_path": "src/components/shop/ShopButton.tsx", + "line_start": 7, + "coverage_pct": 100 + }, + { + "name": "usePermissions", + "file_path": "src/usePermissions.ts", + "line_start": 1, + "coverage_pct": 100 + }, + { + "name": "now", + "file_path": "src/utils/date.ts", + "line_start": 5, + "coverage_pct": 100 + }, + { + "name": "nowIso", + "file_path": "src/utils/format.ts", + "line_start": 13, + "coverage_pct": 100 + } +] diff --git a/fixtures/golden/scenarios.json b/fixtures/golden/scenarios.json index 2869429..2d9430b 100644 --- a/fixtures/golden/scenarios.json +++ b/fixtures/golden/scenarios.json @@ -1,97 +1,122 @@ -[ - { - "id": "files-count", - "prompt": "How many indexed files exist?", - "sql": "SELECT COUNT(*) AS n FROM files" - }, - { - "id": "symbol-usePermissions", - "prompt": "Where is the usePermissions symbol defined?", - "sql": "SELECT name, kind, file_path FROM symbols WHERE name = 'usePermissions'" - }, - { - "id": "index-summary", - "prompt": "Row counts for main tables (same SQL as --recipe index-summary)", - "recipe": "index-summary" - }, - { - "id": "imports-consumer-alias", - "prompt": "Which alias import does consumer.ts use for the API client?", - "sql": "SELECT file_path, source, specifiers, is_type_only FROM imports WHERE file_path = 'src/consumer.ts' AND source = '~/api/client'" - }, - { - "id": "dependencies-from-consumer", - "prompt": "What files does src/consumer.ts depend on (resolved edges)?", - "sql": "SELECT from_path, to_path FROM dependencies WHERE from_path = 'src/consumer.ts' ORDER BY to_path" - }, - { - "id": "markers-notes-todo", - "prompt": "TODO marker in fixture notes markdown", - "sql": "SELECT file_path, line_number, kind, content FROM markers WHERE file_path = 'src/notes.md'" - }, - { - "id": "exports-client", - "prompt": "What does src/api/client.ts export?", - "sql": "SELECT name, kind FROM exports WHERE file_path = 'src/api/client.ts'" - }, - { - "id": "css-variables", - "prompt": "Which CSS custom properties (design tokens) are defined?", - "sql": "SELECT name, value FROM css_variables ORDER BY name" - }, - { - "id": "css-classes-module", - "prompt": "Which CSS classes exist and are any from CSS modules?", - "sql": "SELECT name, file_path, is_module FROM css_classes ORDER BY name" - }, - { - "id": "css-keyframes", - "prompt": "Which @keyframes animations are defined?", - "sql": "SELECT name, file_path FROM css_keyframes" - }, - { - "id": "css-imports", - "prompt": "Which CSS files import other stylesheets?", - "sql": "SELECT file_path, source FROM imports WHERE file_path LIKE '%.css'" - }, - { - "id": "markers-all-kinds", - "prompt": "How many markers of each kind exist?", - "sql": "SELECT kind, COUNT(*) as n FROM markers GROUP BY kind ORDER BY kind" - }, - { - "id": "components-no-false-positives", - "prompt": "Which components are detected (FormatPrice should not appear)?", - "sql": "SELECT name FROM components ORDER BY name" - }, - { - "id": "type-members-client-config", - "prompt": "What fields does ClientConfig have?", - "sql": "SELECT name, type, is_optional, is_readonly FROM type_members WHERE symbol_name = 'ClientConfig' ORDER BY name" - }, - { - "id": "calls-consumer", - "prompt": "What does the run function in consumer.ts call?", - "sql": "SELECT DISTINCT callee_name FROM calls WHERE caller_name = 'run' AND file_path = 'src/consumer.ts' ORDER BY callee_name" - }, - { - "id": "deprecated-symbols", - "prompt": "Which symbols are flagged @deprecated in JSDoc?", - "recipe": "deprecated-symbols" - }, - { - "id": "visibility-tags", - "prompt": "Which symbols are tagged @internal / @private / @alpha / @beta?", - "recipe": "visibility-tags" - }, - { - "id": "files-hashes", - "prompt": "All indexed files with content_hash (recipe powers `codemap validate`)", - "recipe": "files-hashes" - }, - { - "id": "barrel-files", - "prompt": "Top files by export count (barrel candidates)", - "recipe": "barrel-files" - } -] +{ + "setup": [ + { "kind": "ingest-coverage", "path": "coverage/coverage-final.json" } + ], + "scenarios": [ + { + "id": "files-count", + "prompt": "How many indexed files exist?", + "sql": "SELECT COUNT(*) AS n FROM files" + }, + { + "id": "symbol-usePermissions", + "prompt": "Where is the usePermissions symbol defined?", + "sql": "SELECT name, kind, file_path FROM symbols WHERE name = 'usePermissions'" + }, + { + "id": "index-summary", + "prompt": "Row counts for main tables (same SQL as --recipe index-summary)", + "recipe": "index-summary" + }, + { + "id": "imports-consumer-alias", + "prompt": "Which alias import does consumer.ts use for the API client?", + "sql": "SELECT file_path, source, specifiers, is_type_only FROM imports WHERE file_path = 'src/consumer.ts' AND source = '~/api/client'" + }, + { + "id": "dependencies-from-consumer", + "prompt": "What files does src/consumer.ts depend on (resolved edges)?", + "sql": "SELECT from_path, to_path FROM dependencies WHERE from_path = 'src/consumer.ts' ORDER BY to_path" + }, + { + "id": "markers-notes-todo", + "prompt": "TODO marker in fixture notes markdown", + "sql": "SELECT file_path, line_number, kind, content FROM markers WHERE file_path = 'src/notes.md'" + }, + { + "id": "exports-client", + "prompt": "What does src/api/client.ts export?", + "sql": "SELECT name, kind FROM exports WHERE file_path = 'src/api/client.ts'" + }, + { + "id": "css-variables", + "prompt": "Which CSS custom properties (design tokens) are defined?", + "sql": "SELECT name, value FROM css_variables ORDER BY name" + }, + { + "id": "css-classes-module", + "prompt": "Which CSS classes exist and are any from CSS modules?", + "sql": "SELECT name, file_path, is_module FROM css_classes ORDER BY name" + }, + { + "id": "css-keyframes", + "prompt": "Which @keyframes animations are defined?", + "sql": "SELECT name, file_path FROM css_keyframes" + }, + { + "id": "css-imports", + "prompt": "Which CSS files import other stylesheets?", + "sql": "SELECT file_path, source FROM imports WHERE file_path LIKE '%.css'" + }, + { + "id": "markers-all-kinds", + "prompt": "How many markers of each kind exist?", + "sql": "SELECT kind, COUNT(*) as n FROM markers GROUP BY kind ORDER BY kind" + }, + { + "id": "components-no-false-positives", + "prompt": "Which components are detected (FormatPrice should not appear)?", + "sql": "SELECT name FROM components ORDER BY name" + }, + { + "id": "type-members-client-config", + "prompt": "What fields does ClientConfig have?", + "sql": "SELECT name, type, is_optional, is_readonly FROM type_members WHERE symbol_name = 'ClientConfig' ORDER BY name" + }, + { + "id": "calls-consumer", + "prompt": "What does the run function in consumer.ts call?", + "sql": "SELECT DISTINCT callee_name FROM calls WHERE caller_name = 'run' AND file_path = 'src/consumer.ts' ORDER BY callee_name" + }, + { + "id": "deprecated-symbols", + "prompt": "Which symbols are flagged @deprecated in JSDoc?", + "recipe": "deprecated-symbols" + }, + { + "id": "visibility-tags", + "prompt": "Which symbols are tagged @internal / @private / @alpha / @beta?", + "recipe": "visibility-tags" + }, + { + "id": "files-hashes", + "prompt": "All indexed files with content_hash (recipe powers `codemap validate`)", + "recipe": "files-hashes" + }, + { + "id": "barrel-files", + "prompt": "Top files by export count (barrel candidates)", + "recipe": "barrel-files" + }, + { + "id": "coverage-rows-after-ingest", + "prompt": "Coverage rows after ingesting fixtures/minimal/coverage/coverage-final.json", + "sql": "SELECT file_path, name, hit_statements, total_statements, coverage_pct FROM coverage ORDER BY file_path, line_start" + }, + { + "id": "untested-and-dead", + "prompt": "Exported functions with no callers AND zero coverage (the killer recipe)", + "recipe": "untested-and-dead" + }, + { + "id": "files-by-coverage", + "prompt": "Files ranked ascending by statement coverage", + "recipe": "files-by-coverage" + }, + { + "id": "worst-covered-exports", + "prompt": "Top 20 worst-covered exported functions", + "recipe": "worst-covered-exports" + } + ] +} diff --git a/fixtures/minimal/README.md b/fixtures/minimal/README.md index f8c90c2..a08e4df 100644 --- a/fixtures/minimal/README.md +++ b/fixtures/minimal/README.md @@ -21,6 +21,7 @@ Stable tree exercising every codemap surface — used by `src/benchmark.ts`, gol | `--group-by owner` | `CODEOWNERS` (4 owners) | | Project-local recipes | `.codemap/recipes/shop-symbols.{sql,md}` (with frontmatter actions) — file shape valid; loader currently runs at parse time before bootstrap, so `--recipe shop-symbols` is rejected as "unknown" until that's deferred to the runner (known limitation) | | Self-managed `.gitignore` | `.codemap/.gitignore` (codemap-managed) | +| `coverage` (Istanbul + LCOV ingest) | `coverage/coverage-final.json` (Istanbul) + `coverage/lcov.info` (LCOV) — equivalent partial coverage shape; bundled recipes `untested-and-dead`, `files-by-coverage`, `worst-covered-exports` exercise the join axis against `@deprecated` symbols | ## Use @@ -35,6 +36,14 @@ CODEMAP_ROOT="$(pwd)/fixtures/minimal" bun run benchmark # the "Project-local recipes" row above; will work once recipe loading is # deferred past bootstrap) CODEMAP_ROOT="$(pwd)/fixtures/minimal" bun src/index.ts query --recipe shop-symbols --json + +# Static coverage ingest — Istanbul (every modern JS test runner that emits +# coverage-final.json) or LCOV (e.g. `bun test --coverage`). Format auto-detected. +CODEMAP_ROOT="$(pwd)/fixtures/minimal" bun src/index.ts ingest-coverage coverage/coverage-final.json +CODEMAP_ROOT="$(pwd)/fixtures/minimal" bun src/index.ts ingest-coverage coverage/lcov.info + +# After ingest — the killer recipe (exported + no callers + zero coverage) +CODEMAP_ROOT="$(pwd)/fixtures/minimal" bun src/index.ts query --recipe untested-and-dead --json ``` **Editor / `tsc`:** run `bun install` here so `react` + `@types/react` resolve `react/jsx-runtime` for `.tsx` (`jsx: "react-jsx"` in `tsconfig.json`). diff --git a/fixtures/minimal/coverage/coverage-final.json b/fixtures/minimal/coverage/coverage-final.json new file mode 100644 index 0000000..33cfd3f --- /dev/null +++ b/fixtures/minimal/coverage/coverage-final.json @@ -0,0 +1,51 @@ +{ + "src/usePermissions.ts": { + "path": "src/usePermissions.ts", + "statementMap": { + "0": { "start": { "line": 2, "column": 0 }, "end": { "line": 2, "column": 1 } } + }, + "s": { "0": 4 } + }, + "src/api/client.ts": { + "path": "src/api/client.ts", + "statementMap": { + "0": { "start": { "line": 42, "column": 0 }, "end": { "line": 42, "column": 1 } } + }, + "s": { "0": 0 } + }, + "src/utils/format.ts": { + "path": "src/utils/format.ts", + "statementMap": { + "0": { "start": { "line": 6, "column": 0 }, "end": { "line": 6, "column": 1 } }, + "1": { "start": { "line": 14, "column": 0 }, "end": { "line": 14, "column": 1 } } + }, + "s": { "0": 0, "1": 1 } + }, + "src/utils/date.ts": { + "path": "src/utils/date.ts", + "statementMap": { + "0": { "start": { "line": 6, "column": 0 }, "end": { "line": 6, "column": 1 } }, + "1": { "start": { "line": 13, "column": 0 }, "end": { "line": 13, "column": 1 } }, + "2": { "start": { "line": 20, "column": 0 }, "end": { "line": 20, "column": 1 } } + }, + "s": { "0": 1, "1": 0, "2": 0 } + }, + "src/components/shop/ProductCard.tsx": { + "path": "src/components/shop/ProductCard.tsx", + "statementMap": { + "0": { "start": { "line": 13, "column": 0 }, "end": { "line": 13, "column": 1 } }, + "1": { "start": { "line": 14, "column": 0 }, "end": { "line": 14, "column": 1 } }, + "2": { "start": { "line": 16, "column": 0 }, "end": { "line": 16, "column": 1 } } + }, + "s": { "0": 1, "1": 1, "2": 1 } + }, + "src/components/shop/ShopButton.tsx": { + "path": "src/components/shop/ShopButton.tsx", + "statementMap": { + "0": { "start": { "line": 4, "column": 0 }, "end": { "line": 4, "column": 1 } }, + "1": { "start": { "line": 9, "column": 0 }, "end": { "line": 9, "column": 1 } }, + "2": { "start": { "line": 10, "column": 0 }, "end": { "line": 10, "column": 1 } } + }, + "s": { "0": 0, "1": 1, "2": 1 } + } +} diff --git a/fixtures/minimal/coverage/lcov.info b/fixtures/minimal/coverage/lcov.info new file mode 100644 index 0000000..c64bb5d --- /dev/null +++ b/fixtures/minimal/coverage/lcov.info @@ -0,0 +1,43 @@ +TN: +SF:src/usePermissions.ts +DA:2,4 +LF:1 +LH:1 +end_of_record +TN: +SF:src/api/client.ts +DA:42,0 +LF:1 +LH:0 +end_of_record +TN: +SF:src/utils/format.ts +DA:6,0 +DA:14,1 +LF:2 +LH:1 +end_of_record +TN: +SF:src/utils/date.ts +DA:6,1 +DA:13,0 +DA:20,0 +LF:3 +LH:1 +end_of_record +TN: +SF:src/components/shop/ProductCard.tsx +DA:13,1 +DA:14,1 +DA:16,1 +LF:3 +LH:3 +end_of_record +TN: +SF:src/components/shop/ShopButton.tsx +DA:4,0 +DA:9,1 +DA:10,1 +LF:3 +LH:2 +end_of_record diff --git a/scripts/query-golden.ts b/scripts/query-golden.ts index b6e498d..fa3179e 100644 --- a/scripts/query-golden.ts +++ b/scripts/query-golden.ts @@ -4,9 +4,15 @@ import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { createCodemap } from "../src/api"; +import { ingestIstanbul, ingestLcov } from "../src/application/coverage-engine"; import { getQueryRecipeSql } from "../src/application/query-recipes"; +import { closeDb, openDb } from "../src/db"; import { parseScenariosJson } from "./query-golden/schema"; -import type { GoldenMatch, GoldenScenario } from "./query-golden/schema"; +import type { + GoldenMatch, + GoldenScenario, + GoldenSetupStep, +} from "./query-golden/schema"; const REPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); @@ -92,6 +98,46 @@ function defaultMatch(s: GoldenScenario): GoldenMatch { return s.match ?? { kind: "exact" }; } +/** + * Run one-time setup steps after the corpus is indexed and before the first + * scenario. Today: `ingest-coverage` (Istanbul / LCOV — auto-detected by + * extension, mirrors the CLI verb). Extend the dispatch as more one-shot + * ingest verbs land. + */ +function runSetup(steps: GoldenSetupStep[], fixtureRoot: string): void { + const db = openDb(); + try { + for (const step of steps) { + if (step.kind !== "ingest-coverage") continue; + const absPath = resolve(fixtureRoot, step.path); + if (absPath.endsWith(".json")) { + const payload = JSON.parse( + readFileSync(absPath, "utf-8"), + ) as Parameters[0]["payload"]; + ingestIstanbul({ + db, + projectRoot: fixtureRoot, + payload, + sourcePath: absPath, + }); + } else if (absPath.endsWith(".info")) { + ingestLcov({ + db, + projectRoot: fixtureRoot, + payload: readFileSync(absPath, "utf-8"), + sourcePath: absPath, + }); + } else { + throw new Error( + `query-golden setup: cannot auto-detect coverage format from ${absPath}`, + ); + } + } + } finally { + closeDb(db); + } +} + function evaluateMatch( rows: unknown[], match: GoldenMatch, @@ -160,12 +206,13 @@ async function main(): Promise { } const raw = readFileSync(scenariosFile, "utf-8"); - const scenarios = parseScenariosJson(raw); + const { setup, scenarios } = parseScenariosJson(raw); mkdirSync(goldenDir, { recursive: true }); const cm = await createCodemap({ root: fixtureRoot }); await cm.index({ mode: "full", quiet: true }); + if (setup.length > 0) runSetup(setup, fixtureRoot); const modeLabel = UPDATE ? "--update" : "compare"; const corpusLabel = argv.corpus; diff --git a/scripts/query-golden/schema.ts b/scripts/query-golden/schema.ts index a0b69b9..0b8aacc 100644 --- a/scripts/query-golden/schema.ts +++ b/scripts/query-golden/schema.ts @@ -41,9 +41,38 @@ export const scenarioSchema = z export type GoldenScenario = z.infer; -export const scenariosFileSchema = z.array(scenarioSchema); +/** + * One-time setup step run after `cm.index()` and before the first scenario. + * Currently only `ingest-coverage` (Istanbul / LCOV); extend the union as + * other one-shot ingest verbs land. + */ +export const setupStepSchema = z.object({ + kind: z.literal("ingest-coverage"), + /** Path relative to the fixture root (e.g. `coverage/coverage-final.json`). */ + path: z.string().min(1), +}); + +export type GoldenSetupStep = z.infer; + +const legacyArraySchema = z.array(scenarioSchema); +const objectShapeSchema = z.object({ + setup: z.array(setupStepSchema).optional(), + scenarios: z.array(scenarioSchema), +}); + +export const scenariosFileSchema = z.union([ + legacyArraySchema, + objectShapeSchema, +]); + +export interface ParsedScenariosFile { + setup: GoldenSetupStep[]; + scenarios: GoldenScenario[]; +} -export function parseScenariosJson(raw: string): GoldenScenario[] { +export function parseScenariosJson(raw: string): ParsedScenariosFile { const data: unknown = JSON.parse(raw); - return scenariosFileSchema.parse(data); + const parsed = scenariosFileSchema.parse(data); + if (Array.isArray(parsed)) return { setup: [], scenarios: parsed }; + return { setup: parsed.setup ?? [], scenarios: parsed.scenarios }; } diff --git a/templates/recipes/files-by-coverage.md b/templates/recipes/files-by-coverage.md new file mode 100644 index 0000000..a802ad3 --- /dev/null +++ b/templates/recipes/files-by-coverage.md @@ -0,0 +1,14 @@ +--- +actions: + - type: prioritise-test-targets + auto_fixable: false + description: "Files ranked by ascending statement coverage. Lowest-coverage files first — natural backlog for test-writing effort." +--- + +Files ranked ascending by statement coverage — the "what should we test next?" list. + +Aggregates the symbol-level `coverage` table by `file_path` (no separate `file_coverage` rollup table in v1 per D2 of `docs/plans/coverage-ingestion.md`; the GROUP BY is index-bounded by `idx_coverage_file_name`). + +`coverage_pct` is `NULL` when a file has zero testable statements (empty modules, type-only files, interface declarations) — sorted last so they don't drown out actual zero-coverage files. + +Empty until you run `codemap ingest-coverage `. diff --git a/templates/recipes/files-by-coverage.sql b/templates/recipes/files-by-coverage.sql new file mode 100644 index 0000000..64bf41f --- /dev/null +++ b/templates/recipes/files-by-coverage.sql @@ -0,0 +1,12 @@ +SELECT + file_path, + SUM(hit_statements) AS hit_statements, + SUM(total_statements) AS total_statements, + CASE + WHEN SUM(total_statements) = 0 THEN NULL + ELSE ROUND(100.0 * SUM(hit_statements) / SUM(total_statements), 2) + END AS coverage_pct +FROM coverage +GROUP BY file_path +ORDER BY coverage_pct ASC NULLS LAST, file_path ASC +LIMIT 100 diff --git a/templates/recipes/untested-and-dead.md b/templates/recipes/untested-and-dead.md new file mode 100644 index 0000000..668a6da --- /dev/null +++ b/templates/recipes/untested-and-dead.md @@ -0,0 +1,21 @@ +--- +actions: + - type: review-for-deletion + auto_fixable: false + description: "Exported function with zero callers AND zero test coverage — strong dead-code candidate. Verify against framework convention exports (Next.js page.tsx default exports, Storybook stories, vite.config.ts) before deleting; codemap doesn't model framework entry-points yet." +--- + +Exported functions that look structurally dead AND aren't covered by tests — the high-confidence "dead code" predicate. + +Combines two evidence axes: + +1. **Structural**: `is_exported = 1` AND no row in `calls` with `callee_name = s.name`. +2. **Runtime**: no `coverage` row OR `coverage_pct = 0` (treats both as "untested" via `COALESCE(c.coverage_pct, 0) = 0`). + +Returns nothing useful until you've run `codemap ingest-coverage ` (Istanbul or LCOV from any test runner). Without coverage data, every uncalled exported function appears — coverage is what makes the predicate precise. + +**Known v1 limitation (D11 of `docs/plans/coverage-ingestion.md`):** the `callee_name = s.name` predicate is name-only / lossy across cross-file collisions — two functions named `init` in different files will satisfy "no callers" together as long as neither is called _anywhere_. Three concrete narrowing patterns to apply on top of this recipe when the noise is high: + +1. **Scope by directory**: `AND s.file_path LIKE 'src/api/%'` — restricts the predicate to a single owner / package. +2. **Exclude framework entry-point exports**: `AND s.is_default_export = 0` — Next.js page / layout / route handler default exports show up as "no callers" but are live entry points. +3. **Restrict to a kind / visibility**: already filters `kind = 'function'`; add `AND s.visibility IS NULL OR s.visibility = 'public'` to skip `@internal` / `@beta` symbols whose lifecycle you don't yet own. diff --git a/templates/recipes/untested-and-dead.sql b/templates/recipes/untested-and-dead.sql new file mode 100644 index 0000000..a763613 --- /dev/null +++ b/templates/recipes/untested-and-dead.sql @@ -0,0 +1,16 @@ +SELECT + s.name, + s.file_path, + s.line_start, + COALESCE(c.coverage_pct, 0) AS coverage_pct +FROM symbols s +LEFT JOIN coverage c + ON c.file_path = s.file_path + AND c.name = s.name + AND c.line_start = s.line_start +WHERE s.kind = 'function' + AND s.is_exported = 1 + AND NOT EXISTS (SELECT 1 FROM calls WHERE callee_name = s.name) + AND COALESCE(c.coverage_pct, 0) = 0 +ORDER BY s.file_path ASC, s.line_start ASC +LIMIT 100 diff --git a/templates/recipes/worst-covered-exports.md b/templates/recipes/worst-covered-exports.md new file mode 100644 index 0000000..4b16cac --- /dev/null +++ b/templates/recipes/worst-covered-exports.md @@ -0,0 +1,14 @@ +--- +actions: + - type: add-test-suite + auto_fixable: false + description: "Exported function with low or no coverage. If callers exist (use the `calls` table to check), refactor risk goes up with every change." +--- + +Top 20 worst-covered exported functions — high-leverage test-writing targets. + +`COALESCE(c.coverage_pct, 0) = 0` treats "no coverage row" and "0% coverage" identically. To distinguish them (e.g. if you want to know which uncovered symbols have _some_ callers), join `calls` on `callee_name = s.name`. + +Pair with [`untested-and-dead`](./untested-and-dead.md) to split the result set: low-coverage symbols _with_ callers are refactor-risk; low-coverage symbols _without_ callers are dead-code candidates. + +Empty until you run `codemap ingest-coverage `. diff --git a/templates/recipes/worst-covered-exports.sql b/templates/recipes/worst-covered-exports.sql new file mode 100644 index 0000000..0cfed51 --- /dev/null +++ b/templates/recipes/worst-covered-exports.sql @@ -0,0 +1,14 @@ +SELECT + s.name, + s.file_path, + s.line_start, + COALESCE(c.coverage_pct, 0) AS coverage_pct +FROM symbols s +LEFT JOIN coverage c + ON c.file_path = s.file_path + AND c.name = s.name + AND c.line_start = s.line_start +WHERE s.is_exported = 1 + AND s.kind = 'function' +ORDER BY coverage_pct ASC, s.file_path ASC, s.line_start ASC +LIMIT 20 From 9e3871388fea7cfd5641f3bb1b1558563ff1fbed Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:29:47 +0300 Subject: [PATCH 06/10] docs(coverage): architecture + glossary + agent rule/skill lockstep + changeset (Tracer 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the plan execution per docs/README.md Rule 10 (agent rule + skill lockstep) and Rule 9 (new domain nouns → glossary same-PR). docs/architecture.md: - application/ list: add coverage-engine.ts (upsertCoverageRows core + ingestIstanbul / ingestLcov parsers). - New § coverage table under Schema with the natural-key PK rationale cross-referencing query_baselines as the dropAll() exclusion precedent. docs/glossary.md: - New `coverage` table entry: PK shape, NULL semantics, orphan cleanup. - New `codemap ingest-coverage` / Istanbul JSON / LCOV / static coverage ingestion entry: format auto-detection, innermost-wins projection rationale (D7), three bundled recipes, no-half-way principle. .agents/rules/codemap.md + templates/agents/rules/codemap.md (lockstep): - Index intro mentions coverage as part of indexed structure. - New CLI table row for `ingest-coverage`. - Four new trigger-pattern rows: "Is symbol X tested?" → coverage table; "What's structurally dead AND untested?" → --recipe untested-and-dead; "Rank files by test coverage" → --recipe files-by-coverage; "Worst-covered exported functions" → --recipe worst-covered-exports. - Two new quick-reference query rows: symbol coverage + bundled recipe. - Drift between .agents/ and templates/agents/ stays CLI-prefix-only (`bun src/index.ts` vs `codemap`). .agents/skills/codemap/SKILL.md + templates/agents/skills/codemap/SKILL.md (lockstep): new `coverage` table block with full column schema + the three meta keys. docs/research/fallow.md: - C.11 row marked Shipped (link to plan PR + this commit). - Open question "column on symbols vs separate table?" resolved to "separate table" with D1 + D6 cross-reference. docs/roadmap.md: drop the "Static coverage ingestion" backlog row (per Rule 2 — when a backlog item ships, it leaves the roadmap). docs/plans/coverage-ingestion.md: deleted per Rule 3 (plan files have the lifetime of the work; absorbed into architecture / glossary / agent rule on ship). .changeset/coverage-ingestion.md: minor changeset (per .agents/lessons.md "changesets bump policy" — new tables + SCHEMA_VERSION bump = minor). All 24 golden scenarios pass (5 new — coverage-rows-after-ingest, untested-and-dead, files-by-coverage, worst-covered-exports — plus the existing 19). 737 tests across 43 files, 0 fail. --- .agents/rules/codemap.md | 9 +- .agents/skills/codemap/SKILL.md | 15 ++ .changeset/coverage-ingestion.md | 36 +++++ docs/architecture.md | 73 ++++++---- docs/glossary.md | 19 +++ docs/plans/coverage-ingestion.md | 169 ----------------------- docs/research/fallow.md | 4 +- docs/roadmap.md | 1 - templates/agents/rules/codemap.md | 9 +- templates/agents/skills/codemap/SKILL.md | 15 ++ 10 files changed, 149 insertions(+), 201 deletions(-) create mode 100644 .changeset/coverage-ingestion.md delete mode 100644 docs/plans/coverage-ingestion.md diff --git a/.agents/rules/codemap.md b/.agents/rules/codemap.md index a23562f..34970af 100644 --- a/.agents/rules/codemap.md +++ b/.agents/rules/codemap.md @@ -6,7 +6,7 @@ alwaysApply: true > **STOP.** Before you call Grep, Glob, SemanticSearch, or Read to answer a **structural** question about this repository — query the Codemap SQLite index first. This is not optional when the question matches a trigger pattern below. -A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** (`ensureStateGitignore`) — bumping its canonical body in a PR auto-applies on every consumer's next run. +A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes, and (after `bun src/index.ts ingest-coverage `) static coverage from Istanbul JSON or LCOV. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** (`ensureStateGitignore`) — bumping its canonical body in a PR auto-applies on every consumer's next run. **This file** is for **developing Codemap** in this clone. **End users** of the published package get the agent rule from **`templates/agents/`** (via **`codemap agents init`**). **Generic defaults:** SQL and triggers stay project-agnostic — **edit** this rule for repo-specific paths and queries. @@ -32,6 +32,7 @@ A local database (default **`.codemap/index.db`**) indexes structure: symbols, i | Targeted read (metadata) | — | `bun src/index.ts show [--kind ] [--in ] [--json]` — file:line + signature | | Targeted read (source text) | — | `bun src/index.ts snippet [--kind ] [--in ] [--json]` — same lookup + source from disk + stale flag | | Impact (blast-radius walker) | — | `bun src/index.ts impact [--direction up\|down\|both] [--depth N] [--via ] [--limit N] [--summary] [--json]` — replaces hand-composed `WITH RECURSIVE` queries | +| Coverage ingest | — | `bun src/index.ts ingest-coverage [--json]` — Istanbul (`coverage-final.json`) or LCOV (`lcov.info`); format auto-detected. Joinable to `symbols` for "untested AND dead" queries. | | SARIF / GH annotations | — | `bun src/index.ts query --recipe deprecated-symbols --format sarif` · `… --format annotations` | **Recipe `actions`:** with **`--json`**, recipes that define an `actions` template append it to every row (kebab-case verb + description — e.g. `fan-out` → `review-coupling`). Under `--baseline`, actions attach to the **`added`** rows only. Inspect via **`--recipes-json`**. Ad-hoc SQL never carries actions. @@ -109,6 +110,10 @@ If the question looks like any of these → use the index: | "Is symbol X deprecated?" / "What does X do?" | `symbols` (`doc_comment`) | | "What's `@internal` / `@beta` / `@alpha` / `@private`?" | `symbols.visibility` (parsed JSDoc tag — not regex) | | "Who calls X?" / "What does X call?" | `calls` | +| "Is symbol X tested?" / "What's the coverage of file Y?" | `coverage` (after `ingest-coverage`) | +| "What's structurally dead AND untested?" | `--recipe untested-and-dead` | +| "Rank files by test coverage" | `--recipe files-by-coverage` | +| "Worst-covered exported functions" | `--recipe worst-covered-exports` | ## When Grep / Read IS appropriate @@ -156,6 +161,8 @@ bun src/index.ts query --json "" | Who calls X? | `SELECT DISTINCT caller_name, file_path FROM calls WHERE callee_name = '...'` | | What does X call? | `SELECT DISTINCT callee_name FROM calls WHERE caller_name = '...'` | | Call hotspots | `SELECT callee_name, COUNT(*) as fan_in FROM calls GROUP BY callee_name ORDER BY fan_in DESC LIMIT 10` | +| Symbol coverage | `SELECT name, hit_statements, total_statements, coverage_pct FROM coverage WHERE file_path = '...'` | +| Untested + dead exports | `bun src/index.ts query --json --recipe untested-and-dead` | **Use `DISTINCT`** on dependency and import queries — a file importing multiple specifiers from the same module produces duplicate rows. diff --git a/.agents/skills/codemap/SKILL.md b/.agents/skills/codemap/SKILL.md index b78ab04..fff77c5 100644 --- a/.agents/skills/codemap/SKILL.md +++ b/.agents/skills/codemap/SKILL.md @@ -267,6 +267,21 @@ User-facing baselines saved by `codemap query --save-baseline`, replayed by `cod | git_ref | TEXT | `git rev-parse HEAD` at save time, or NULL when not a git working tree | | created_at | INTEGER | `Date.now()` at save time (epoch ms) | +### `coverage` — Statement coverage (user data, ingested via `codemap ingest-coverage`) + +Static coverage from Istanbul JSON or LCOV. Joinable to `symbols` for "what's untested?" queries. **Survives `--full` and SCHEMA bumps** — intentionally absent from `dropAll()`. Empty until first ingest. + +| Column | Type | Description | +| ---------------- | ------- | -------------------------------------------------------------------------------------------------------- | +| file_path | TEXT PK | Project-relative path; matches `symbols.file_path`. Forward-slashed (Windows paths normalised on ingest) | +| name | TEXT PK | Symbol name (matches `symbols.name`). Same `(file_path, name, line_start)` is unique by construction | +| line_start | INT PK | Symbol's starting line (matches `symbols.line_start`). Disambiguates re-declared names | +| coverage_pct | REAL | Percentage 0.0–100.0; `NULL` when `total_statements = 0` (zero-statement scope; not the same as 0%) | +| hit_statements | INTEGER | Count of statements with non-zero hit count after innermost-wins projection | +| total_statements | INTEGER | Count of statements that projected onto this symbol | + +Three meta keys (`coverage_last_ingested_at` / `_path` / `_format`) record freshness — single ingest at a time, format is meta-level. + ## Query patterns ### Basic lookups diff --git a/.changeset/coverage-ingestion.md b/.changeset/coverage-ingestion.md new file mode 100644 index 0000000..860cb71 --- /dev/null +++ b/.changeset/coverage-ingestion.md @@ -0,0 +1,36 @@ +--- +"@stainless-code/codemap": minor +--- + +`codemap ingest-coverage ` — static coverage ingestion. Reads Istanbul JSON (`coverage-final.json`) or LCOV (`lcov.info`) into a new `coverage` table joinable to `symbols`, so structural queries can compose coverage filters in pure SQL — no runtime tracer, no paid coverage stack. + +**Both formats land in v1** (Istanbul + LCOV) so every test runner is a first-class consumer on day one — `vitest --coverage`, `jest --coverage`, `c8`, `nyc` (Istanbul JSON), and `bun test --coverage` (LCOV) all work without waiting on a follow-up release. + +**Bundled recipes (auto-discovered, no opt-in needed):** + +- `untested-and-dead` — exported functions with no callers AND zero coverage; the killer recipe combining structural and runtime evidence axes. +- `files-by-coverage` — files ranked ascending by statement coverage. +- `worst-covered-exports` — top-20 worst-covered exported functions. + +Each recipe ships a frontmatter `actions` block so agents see per-row follow-up hints in `--json` output. + +**Schema:** + +- New `coverage` table with natural-key PK `(file_path, name, line_start)` — intentionally not a FK to `symbols.id` so coverage rows survive the `symbols` drop-recreate cycle on every `--full` reindex. +- `idx_coverage_file_name` covers the typical join shape and the `GROUP BY file_path` scan used by the `files-by-coverage` recipe. +- Three new `meta` keys (`coverage_last_ingested_at` / `_path` / `_format`) record ingest freshness. +- `SCHEMA_VERSION` 5 → 6 — auto-rebuilds on next `codemap` run; the new table is empty until first `ingest-coverage` invocation. Subsequent bumps preserve coverage data via the `dropAll()` exclusion. + +**CLI:** + +```bash +codemap ingest-coverage coverage/coverage-final.json # Istanbul (auto-detected) +codemap ingest-coverage coverage/lcov.info # LCOV (auto-detected) +codemap ingest-coverage coverage --json # directory probe (errors if both files present) + +codemap query --json --recipe untested-and-dead # the killer query +``` + +No `--source` flag — format is auto-detected from extension. No MCP / HTTP transport in v1 — coverage exposes as a SQL column, composable with every existing recipe and ad-hoc query through the existing `query` / `query_recipe` tools (no parallel surface). + +Plan: PR #56 (merged). Implementation: this PR. diff --git a/docs/architecture.md b/docs/architecture.md index dd3e93a..9a14375 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -16,13 +16,13 @@ A local SQLite database (`.codemap/index.db`) indexes the project tree and store ## Layering -| Layer | Role | -| -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **`cli/`** (`bootstrap`, `main`, `cmd-*`) | Parses argv; **dynamic `import()`** loads only the command chunk (`cmd-index`, `cmd-query`, `cmd-agents`) so `--help` / `version` / `agents init` avoid the indexer. | -| **`api.ts`** | Public programmatic surface: `createCodemap()`, `Codemap` (`query`, `index`), re-exports `runCodemapIndex` for advanced use. | -| **`application/`** | Pure transport-agnostic engines + handlers: `run-index.ts` / `index-engine.ts` (orchestration + indexing); `query-engine.ts` (`executeQuery` / `executeQueryBatch`); `audit-engine.ts` (`runAudit` + `resolveAuditBaselines` + `runAuditFromRef` + `makeWorktreeReindex`); `audit-worktree.ts` (sha-keyed cache + atomic populate); `context-engine.ts` (`buildContextEnvelope`); `validate-engine.ts` (`computeValidateRows` + `toProjectRelative`); `show-engine.ts` (lookup + envelope builders); `impact-engine.ts` (`findImpact` — graph blast-radius walker); `query-recipes.ts` + `recipes-loader.ts` (recipe registry); `output-formatters.ts` (SARIF + GH annotations); `watcher.ts` (chokidar-backed debounced reindex; pure helpers + injectable backend); `tool-handlers.ts` + `resource-handlers.ts` (transport-agnostic tool / resource handlers shared by MCP + HTTP); `mcp-server.ts` (MCP transport — stdio); `http-server.ts` (HTTP transport — `node:http`). Engines depend on `db.ts` / `runtime.ts`; **never** on `cli/`. | -| **`adapters/`** | `LanguageAdapter` registry; built-ins call `parser.ts` / `css-parser.ts` / `markers.ts` from `parse-worker-core`. | -| **`runtime.ts` / `config.ts` / `db.ts` / …** | Config, SQLite, resolver, workers. | +| Layer | Role | +| -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **`cli/`** (`bootstrap`, `main`, `cmd-*`) | Parses argv; **dynamic `import()`** loads only the command chunk (`cmd-index`, `cmd-query`, `cmd-agents`) so `--help` / `version` / `agents init` avoid the indexer. | +| **`api.ts`** | Public programmatic surface: `createCodemap()`, `Codemap` (`query`, `index`), re-exports `runCodemapIndex` for advanced use. | +| **`application/`** | Pure transport-agnostic engines + handlers: `run-index.ts` / `index-engine.ts` (orchestration + indexing); `query-engine.ts` (`executeQuery` / `executeQueryBatch`); `audit-engine.ts` (`runAudit` + `resolveAuditBaselines` + `runAuditFromRef` + `makeWorktreeReindex`); `audit-worktree.ts` (sha-keyed cache + atomic populate); `context-engine.ts` (`buildContextEnvelope`); `validate-engine.ts` (`computeValidateRows` + `toProjectRelative`); `show-engine.ts` (lookup + envelope builders); `impact-engine.ts` (`findImpact` — graph blast-radius walker); `coverage-engine.ts` (`upsertCoverageRows` core + `ingestIstanbul` / `ingestLcov` parsers — see [§ Static coverage ingestion](#static-coverage-ingestion)); `query-recipes.ts` + `recipes-loader.ts` (recipe registry); `output-formatters.ts` (SARIF + GH annotations); `watcher.ts` (chokidar-backed debounced reindex; pure helpers + injectable backend); `tool-handlers.ts` + `resource-handlers.ts` (transport-agnostic tool / resource handlers shared by MCP + HTTP); `mcp-server.ts` (MCP transport — stdio); `http-server.ts` (HTTP transport — `node:http`). Engines depend on `db.ts` / `runtime.ts`; **never** on `cli/`. | +| **`adapters/`** | `LanguageAdapter` registry; built-ins call `parser.ts` / `css-parser.ts` / `markers.ts` from `parse-worker-core`. | +| **`runtime.ts` / `config.ts` / `db.ts` / …** | Config, SQLite, resolver, workers. | `index.ts` is the package entry: re-exports the public API and runs `cli/main` only when executed as the main module (Node/Bun `codemap` binary). @@ -92,26 +92,26 @@ A local SQLite database (`.codemap/index.db`) indexes the project tree and store ## Key Files -| File | Purpose | -| ------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `index.ts` | Package entry — re-exports `api` / `config`, runs CLI when main | -| `cli/` | CLI — bootstrap argv, lazy command modules, `query` / `validate` / `context` / `agents init` / index modes | -| `api.ts` | Programmatic API — `createCodemap`, `Codemap`, `runCodemapIndex` | -| `application/` | Pure transport-agnostic engines (`run-index`, `index-engine`, `query-engine`, `audit-engine`, `context-engine`, `validate-engine`, `show-engine`, `impact-engine`, `query-recipes`, `recipes-loader`, `mcp-server`, `http-server`, `watcher`) | -| `worker-pool.ts` | Parallel parse workers (Bun / Node) | -| `db.ts` | SQLite adapter — schema DDL, typed CRUD, connection management | -| `parser.ts` | TS/TSX/JS/JSX extraction via `oxc-parser` — symbols (with JSDoc + generics + return types), type members, imports, exports, components, markers | -| `css-parser.ts` | CSS extraction via `lightningcss` — custom properties, classes, keyframes, `@theme` blocks | -| `resolver.ts` | Import path resolution via `oxc-resolver` — respects `tsconfig` aliases, builds dependency graph | -| `constants.ts` | Shared constants — e.g. `LANG_MAP` | -| `glob-sync.ts` | Include globs — Bun `Glob` vs `tinyglobby` on Node ([packaging § Node vs Bun](./packaging.md#node-vs-bun)) | -| `markers.ts` | Shared marker extraction (`TODO`/`FIXME`/`HACK`/`NOTE`) — used by all parsers | -| `parse-worker.ts` | Worker thread entry point — reads, parses, and extracts file data in parallel | -| `adapters/` | `LanguageAdapter` types and built-in TS/CSS/text implementations | -| `parsed-types.ts` | Shared `ParsedFile` shape for workers and adapters | -| `agents-init.ts` / `agents-init-interactive.ts` | `codemap agents init` — see [agents.md](./agents.md) (granular template + IDE writes, pointer upsert, **`--interactive`**, `.gitignore`) | -| `benchmark.ts` (+ `benchmark-default-scenarios.ts`, `benchmark-config.ts`, `benchmark-common.ts`) | SQL vs traditional timing; optional **`CODEMAP_BENCHMARK_CONFIG`** JSON — [benchmark.md § Custom scenarios](./benchmark.md#custom-scenarios-codemap_benchmark_config) | -| `config.ts` | `/config.{ts,js,json}` load path, **Zod** user schema (`codemapUserConfigSchema`), `resolveCodemapConfig` | +| File | Purpose | +| ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `index.ts` | Package entry — re-exports `api` / `config`, runs CLI when main | +| `cli/` | CLI — bootstrap argv, lazy command modules, `query` / `validate` / `context` / `agents init` / index modes | +| `api.ts` | Programmatic API — `createCodemap`, `Codemap`, `runCodemapIndex` | +| `application/` | Pure transport-agnostic engines (`run-index`, `index-engine`, `query-engine`, `audit-engine`, `context-engine`, `validate-engine`, `show-engine`, `impact-engine`, `coverage-engine`, `query-recipes`, `recipes-loader`, `mcp-server`, `http-server`, `watcher`) | +| `worker-pool.ts` | Parallel parse workers (Bun / Node) | +| `db.ts` | SQLite adapter — schema DDL, typed CRUD, connection management | +| `parser.ts` | TS/TSX/JS/JSX extraction via `oxc-parser` — symbols (with JSDoc + generics + return types), type members, imports, exports, components, markers | +| `css-parser.ts` | CSS extraction via `lightningcss` — custom properties, classes, keyframes, `@theme` blocks | +| `resolver.ts` | Import path resolution via `oxc-resolver` — respects `tsconfig` aliases, builds dependency graph | +| `constants.ts` | Shared constants — e.g. `LANG_MAP` | +| `glob-sync.ts` | Include globs — Bun `Glob` vs `tinyglobby` on Node ([packaging § Node vs Bun](./packaging.md#node-vs-bun)) | +| `markers.ts` | Shared marker extraction (`TODO`/`FIXME`/`HACK`/`NOTE`) — used by all parsers | +| `parse-worker.ts` | Worker thread entry point — reads, parses, and extracts file data in parallel | +| `adapters/` | `LanguageAdapter` types and built-in TS/CSS/text implementations | +| `parsed-types.ts` | Shared `ParsedFile` shape for workers and adapters | +| `agents-init.ts` / `agents-init-interactive.ts` | `codemap agents init` — see [agents.md](./agents.md) (granular template + IDE writes, pointer upsert, **`--interactive`**, `.gitignore`) | +| `benchmark.ts` (+ `benchmark-default-scenarios.ts`, `benchmark-config.ts`, `benchmark-common.ts`) | SQL vs traditional timing; optional **`CODEMAP_BENCHMARK_CONFIG`** JSON — [benchmark.md § Custom scenarios](./benchmark.md#custom-scenarios-codemap_benchmark_config) | +| `config.ts` | `/config.{ts,js,json}` load path, **Zod** user schema (`codemapUserConfigSchema`), `resolveCodemapConfig` | ## CLI usage @@ -340,6 +340,25 @@ User-facing baselines saved by `codemap query --save-baseline`, replayed by `cod | git_ref | TEXT | `git rev-parse HEAD` at save time, or NULL when not a git working tree | | created_at | INTEGER | `Date.now()` at save time (epoch ms) | +### `coverage` — Statement coverage (user data) (`STRICT, WITHOUT ROWID`) + +Statement-level coverage ingested by `codemap ingest-coverage ` from Istanbul JSON or LCOV. Joinable to `symbols` for "what's untested?" queries. Same lifecycle posture as `query_baselines`: **intentionally absent from `dropAll()`** so `--full` and `SCHEMA_VERSION` rebuilds preserve user ingest. + +Natural-key PK `(file_path, name, line_start)` — deliberately **not** a FK to `symbols.id`. `symbols.id` is `INTEGER PRIMARY KEY AUTOINCREMENT`; on `--full` reindex `dropAll()` drops `symbols` and `createTables()` recreates it with fresh ids. A FK with `ON DELETE CASCADE` would wipe every coverage row on every full rebuild, and the recreated symbols wouldn't match the old ids anyway. Natural key sidesteps the entire CASCADE hazard. Trade-off: orphan rows when a file is deleted from the project — cleaned by `DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files)` at the end of every ingest. + +Three meta keys (`coverage_last_ingested_at` / `_path` / `_format`) record freshness — single ingest at a time, so format is meta-level not per-row. + +| Column | Type | Description | +| ---------------- | ------- | --------------------------------------------------------------------------------------------------------- | +| file_path | TEXT PK | Project-relative path; matches `symbols.file_path`. Forward-slashed (Windows paths normalised on ingest) | +| name | TEXT PK | Symbol name (matches `symbols.name`). Same `(file_path, name, line_start)` is unique by construction | +| line_start | INT PK | Symbol's starting line (matches `symbols.line_start`). Disambiguates re-declared names | +| coverage_pct | REAL | Percentage 0.0–100.0; `NULL` when `total_statements = 0` (zero-statement scope; not the same as 0%) | +| hit_statements | INTEGER | Count of statements with `s[id] > 0` after the innermost-wins projection (D7 of plans/coverage-ingestion) | +| total_statements | INTEGER | Count of statements that projected onto this symbol | + +Bundled recipes consuming the table — `untested-and-dead`, `files-by-coverage`, `worst-covered-exports`. Each ships a frontmatter `actions` block (per PR #26) so agents see per-row follow-up hints in `--json` output. + ### Indexes All tables have covering indexes tuned for AI agent query patterns. See [Covering indexes](#covering-indexes) and [Partial indexes](#partial-indexes) for the full list. diff --git a/docs/glossary.md b/docs/glossary.md index 2878de0..bfd510e 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -99,6 +99,25 @@ CLI subcommand comparing on-disk SHA-256 against `files.content_hash`. Statuses: React components (PascalCase + JSX return or hook usage). PascalCase functions that neither return JSX nor call hooks stay in `symbols` only — never `components`. `hooks_used` is JSON-encoded. See `ComponentRow`. +### `coverage` (table) + +Static statement coverage ingested from Istanbul JSON or LCOV via `codemap ingest-coverage `. Natural-key PK `(file_path, name, line_start)` — intentionally **not** a FK to `symbols.id` because `symbols` re-creates with fresh AUTOINCREMENT ids on every `--full` reindex; the natural-key approach lets coverage rows survive that churn (`coverage` is also intentionally absent from `dropAll()`, joins the `query_baselines` precedent). Columns: `coverage_pct REAL` (`NULL` when `total_statements = 0` — "untested" and "no testable code" are different signals), `hit_statements`, `total_statements`. Orphan rows (file deleted from project) are cleaned by an explicit `DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files)` at the end of every ingest. Three meta keys (`coverage_last_ingested_at` / `_path` / `_format`) record freshness — single ingest at a time, so format is meta-level not per-row. + +### `codemap ingest-coverage` / Istanbul JSON / LCOV / static coverage ingestion + +`codemap ingest-coverage [--json]` reads a static coverage artifact and writes statement-level rows into the `coverage` table. Two formats in v1: + +- **Istanbul JSON** (`coverage-final.json`) — emitted natively by `c8`, `nyc`, `vitest --coverage --coverage.reporter=json`, `jest --coverage --coverageReporters=json`. Parser reads `statementMap` + `s` (per-statement hit counts). +- **LCOV** (`lcov.info`) — emitted by `bun test --coverage`, `c8 --reporter=lcov`, every legacy stack. Parser tokenises `SF:` / `DA:,` / `end_of_record` records; ignores `TN:` / `FN:` / `BRDA:` / `LF:` / `LH:` (statement coverage only in v1). + +Format auto-detected from extension (`.json` → istanbul, `.info` → lcov, directory → probe both, error if ambiguous). No `--source` flag (per the plan's "no half-way APIs" principle — adding a flag for what the engine can detect is API noise). Each statement projects onto the **innermost** enclosing symbol via JS-side `(line_end - line_start) ASC` tie-break — required because nested symbols (class methods inside classes, closures inside functions) would otherwise inflate `total_statements`. Statements that fall outside every symbol range (top-level expressions, side-effect imports) increment `skipped.statements_no_symbol` for observability. Three bundled recipes consume the table at first-class agent surface (no agent ever has to hand-compose the JOIN): + +- `untested-and-dead` — exported functions with no callers AND zero coverage (the killer recipe; ships with a name-collision mitigation guide in the recipe `.md`). +- `files-by-coverage` — files ranked ascending by statement coverage (replaces a deferred `file_coverage` rollup table; aggregates the symbol-level table via index-bounded `GROUP BY`). +- `worst-covered-exports` — top-20 worst-covered exported functions. + +Engine: `application/coverage-engine.ts` — pure `upsertCoverageRows({db, projectRoot, rows, format, sourcePath})` core consumed by both `ingestIstanbul` and `ingestLcov`. + ### `content_hash` Column on the `files` table. Lowercase SHA-256 hex of file bytes computed by `src/hash.ts`. Drives incremental staleness detection (`getChangedFiles`) and powers the `files-hashes` recipe + `codemap validate` CLI. diff --git a/docs/plans/coverage-ingestion.md b/docs/plans/coverage-ingestion.md deleted file mode 100644 index 647f209..0000000 --- a/docs/plans/coverage-ingestion.md +++ /dev/null @@ -1,169 +0,0 @@ -# Static coverage ingestion (Istanbul JSON + LCOV → `coverage` table) - -> **Status:** in design (no code) · **Backlog:** [research/fallow.md § C.11](../research/fallow.md#tier-c--ship-eventually-months-high-payoff-large-surface). Delete this file when shipped (per [`docs/README.md` Rule 3](../README.md)). - -## Goal - -Ingest static coverage artifacts — Istanbul JSON (`coverage-final.json`) and LCOV (`lcov.info`) — into the codemap index so structural queries can compose coverage filters in pure SQL, without bolting Codemap to a runtime tracer or paid coverage stack. Both formats land in v1 so every coverage tool (vitest / jest / c8 / nyc / `bun test --coverage`) is a first-class consumer on day one. - -The killer recipe this unlocks: - -```sql --- "What's structurally dead AND untested?" — single query, two evidence axes. --- `calls` is name-keyed (no symbol-id FK, see `db.ts` `CallRow`), so the --- "no callers" predicate is name-only / lossy across cross-file collisions --- (acceptable v1 limitation — see D11). The `coverage` join uses the natural --- key (file_path, name, line_start) so it survives `--full` reindex (D6). -SELECT s.name, s.file_path, c.coverage_pct -FROM symbols s -LEFT JOIN coverage c - ON c.file_path = s.file_path - AND c.name = s.name - AND c.line_start = s.line_start -WHERE s.is_exported = 1 - AND NOT EXISTS (SELECT 1 FROM calls WHERE callee_name = s.name) - AND COALESCE(c.coverage_pct, 0) = 0 -ORDER BY s.file_path, s.line_start; -``` - -Today an agent has to run two tools (`codemap` + a coverage reader) and join in JS. After this lands it's one `query` + one `JOIN`. - -## Why - -- **Codemap is structural-only today.** Every "is this dead?" query has a structural false-positive rate (the §0 fallow audit found an 8-file widget pack with non-zero structural fan-in but zero runtime usage). Coverage is the **complementary evidence axis** — `structural fan-in = 0` AND `runtime coverage = 0` is the high-confidence "dead" predicate. -- **Static ingestion is free.** Istanbul JSON (`vitest`, `jest`, `c8 --reporter=json`, `nyc`) and LCOV (`bun test --coverage`, `c8 --reporter=lcov`, every legacy stack) cover the entire test-runner ecosystem. We're not building a coverage tracer — we're reading the artifacts those tools already produce. -- **Fallow's runtime intelligence is paid.** Static coverage ingestion gets ~80% of the agent value (the "is X dead?" predicate above) without entering Fallow's V8/production-beacon territory (explicit non-goal per [research/fallow.md § D.16](../research/fallow.md#defer--skip)). -- **Composes with `codemap impact`.** `impact --direction up --depth 0` returns callers; joining `coverage` on the result tells the agent "this symbol has 12 callers but only 2 of them are hit by tests" — refactor risk in one query. - -## Agent journey (the "first-class" axis) - -Every common agent question this feature unlocks must be a one-verb call, not "compose this JOIN yourself" — that's what "fully capable, no half-way APIs" means in practice. The v1 surface: - -| Agent asks | v1 verb | Backed by | -| ----------------------------------------- | -------------------------------------------------------------- | ------------------------ | -| "Is `legacyClient` tested?" | `query "SELECT * FROM coverage WHERE name = '…'"` | `coverage` tbl | -| "What's structurally dead AND untested?" | `query --recipe untested-and-dead` | D13 | -| "Rank files by test coverage" | `query --recipe files-by-coverage` | D13 (covers D2 deferral) | -| "Worst-covered exported symbols (top 20)" | `query --recipe worst-covered-exports` | D13 | -| "Coverage of these specific symbols" | `query "SELECT … FROM coverage WHERE name IN (…)"` | `coverage` tbl | -| "Did coverage change since base?" | `--save-baseline` + `--baseline` (existing primitive composes) | B.6 | -| "Refactor risk: callers vs coverage" | `impact ` JSON piped into a `coverage` LEFT JOIN | impact + coverage | - -Recipes 2 and 3 are added because deferring the `file_coverage` rollup table (D2) would otherwise force the agent to compose `GROUP BY` queries by hand — half-baked surface. Bundling the recipes keeps the schema lean (D2) AND the agent surface complete (D13). - -## Sketched layout - -### Schema (D1, D6 natural-key fix) - -```sql --- Single table; symbols-side denormalisation rejected (D1). Natural-key PK --- (file_path, name, line_start) — NOT a FK to symbols.id — so rows survive --- the symbols-table drop-and-recreate cycle on every `--full` reindex (D6). --- file_coverage rollup deferred to v1.x (D2). -CREATE TABLE coverage ( - file_path TEXT NOT NULL, - name TEXT NOT NULL, - line_start INTEGER NOT NULL, - coverage_pct REAL, -- NULL when total_statements = 0 (D5 edge) - hit_statements INTEGER NOT NULL, - total_statements INTEGER NOT NULL, - PRIMARY KEY (file_path, name, line_start) -) STRICT, WITHOUT ROWID; - --- Index that mirrors the typical join shape `symbols.{file_path,name,line_start}`. -CREATE INDEX idx_coverage_file_name ON coverage(file_path, name); - --- Meta row: timestamp + source path of the last successful ingest. Lets agents --- check freshness without a separate verb. NULL when no coverage ever ingested. --- `source` lives here (single ingest at a time), not as a per-row column. -INSERT INTO meta (key, value) VALUES ('coverage_last_ingested_at', ''); -INSERT INTO meta (key, value) VALUES ('coverage_last_ingested_path', ''); -INSERT INTO meta (key, value) VALUES ('coverage_last_ingested_format', 'istanbul'); -- or 'lcov' -``` - -**Why no FK / CASCADE:** `symbols.id` is `INTEGER PRIMARY KEY AUTOINCREMENT`; on `--full` reindex `dropAll()` drops `symbols` and `createTables()` recreates it with fresh IDs. A FK with CASCADE would wipe every coverage row on every full rebuild even if `coverage` itself were excluded from `dropAll()` — see D6 for the full unwind. Natural key sidesteps it. Orphan rows (file deleted from project) get cleaned by an explicit one-statement sweep at the end of every ingest: - -```sql -DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files); -``` - -### CLI - -```text -codemap ingest-coverage [--json] -``` - -- `` — required. One of: `coverage-final.json` (Istanbul), `lcov.info` (LCOV), or a directory — engine probes the directory for either filename, errors if both or neither are present (no precedence guessing — explicit is better than implicit). -- `--json` — emit `{ingested: {symbols: N, files: M}, skipped: {unmatched_files: K, statements_no_symbol: S}, pruned_orphans: O, format: "istanbul"|"lcov"}` envelope on stdout. - -**No `--source` flag** — format is auto-detected from extension (`.json` → istanbul, `.info` → lcov). Adding a flag for a format the engine can detect is API noise; if a user ever has a misnamed file, they can rename it (one-liner) cheaper than codemap can grow a flag for it. - -**No `--prune` flag** — orphan cleanup is unconditional after every ingest (one DELETE; cheap; the only time coverage rows for a deleted file would be valid evidence is "this file used to exist" which is git's job, not codemap's). - -Decoupled from `codemap` (the index command) on purpose (D4) — coverage runs once per test invocation, not once per file edit. - -## Decisions - -| # | Decision | -| --- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| D1 | **Separate `coverage` table, not columns on `symbols`.** Resolves the open question in [research/fallow.md § 6](../research/fallow.md#6-open-questions). Three reasons: (a) coverage shape evolves independently of structural columns (per-branch, per-function, per-line metrics layered later) — denormalising churns `SCHEMA_VERSION` every time; (b) `symbols` rows exist for every TS / TSX file; coverage rows only for tested files — joining via `LEFT JOIN coverage` keeps NULL semantics explicit; (c) lifecycle independence — `symbols` re-creates on every `--full` reindex with fresh auto-increment IDs; the natural-key PK in `coverage` (D6) survives that churn without re-ingest. | -| D2 | **One table in v1; defer `file_coverage` rollup to v1.x — but ship the agent-facing answer as a bundled recipe.** `SELECT file_path, SUM(hit_statements), SUM(total_statements) FROM coverage GROUP BY file_path` answers "files ranked by coverage" with a single index-scan on `idx_coverage_file_name`. A separate rollup table doubles the source-of-truth surface and adds an UPSERT path; promote only after a real consumer query proves the GROUP BY is the bottleneck. The agent surface stays first-class via the bundled `files-by-coverage` recipe (D13) — no agent ever has to compose the GROUP BY by hand. Same "don't pre-emptively widen" discipline as D5; same "first-class via recipe" pattern as D13. | -| D3 | **Both Istanbul JSON and LCOV in v1; c8 raw V8 traces never.** Earlier draft deferred LCOV to v1.x; reversed because shipping a coverage feature that excludes `bun test --coverage` users (which ships LCOV / text reporters only — verified via `bun test --help`) is half-baked for codemap's own primary runtime, and the "no half-way APIs" principle bans it. Istanbul JSON (`coverage-final.json`) covers `c8`, `nyc`, `vitest --coverage --coverage.reporter=json`, `jest --coverage --coverageReporters=json`. LCOV (`lcov.info`) covers `bun test --coverage`, `c8 --reporter=lcov`, every legacy stack. Together: every modern coverage tool emits at least one of these — no consumer left waiting. Two parser front-ends share one `upsertCoverageRows()` core, so the second format costs ~1 LoC per parser plus the regex tokenizer for LCOV. Raw V8 traces (`*.cpu.profile` / `coverage-c8/*.json`) stay out of scope — Fallow's paid moat, requires a runtime tracer Codemap doesn't ship. | -| D4 | **One-shot `codemap ingest-coverage `, NOT auto-detected during `codemap` runs.** Reasons: (a) `codemap` is sub-100ms cold-start; auto-probing for `coverage/coverage-final.json` adds a `stat` and grows the surface for "where did codemap look for coverage?"; (b) coverage cadence (once per test run) is decoupled from index cadence (every file edit) — coupling them means stale coverage on every save; (c) explicit verb makes the agent's mental model trivial: "tests ran → `codemap ingest-coverage` → `codemap query`". `codemap watch` does NOT auto-ingest coverage on `coverage-final.json` change (separate concern; revisit if demand materialises). | -| D5 | **Statement coverage only in v1.** Istanbul tracks statement / branch / function / line coverage separately. `coverage_pct` is **statement** coverage (the most stable signal across runners — function coverage misses anonymous closures, branch coverage explodes for switch / ternary). v1.x can add `branch_coverage_pct` / `function_coverage_pct` columns once a real consumer asks. Don't pre-emptively widen. | -| D6 | **Natural-key PK `(file_path, name, line_start)` — no FK to `symbols.id`.** Earlier draft used `symbol_id REFERENCES symbols(id) ON DELETE CASCADE`; CodeRabbit-style self-audit caught that this would wipe every coverage row on every `--full` reindex (because `dropAll()` drops `symbols` → CASCADE fires → recreated `symbols` get fresh auto-increment IDs that don't match the old ones anyway). Natural key sidesteps the entire CASCADE hazard. Trade-off: orphan rows when a file is deleted from the project; cleaned by a single `DELETE FROM coverage WHERE file_path NOT IN (SELECT path FROM files)` at the end of every ingest. `coverage` is excluded from `dropAll()` (joins the `query_baselines` precedent) so the natural-key rows persist across `--full`. Re-ingest is the user's explicit refresh verb only when test results actually changed. | -| D7 | **Symbol mapping: innermost-wins line-range projection.** Istanbul's `statementMap` is line-keyed; we project per-statement hits onto the **innermost** enclosing symbol — `line_start ≤ stmt_line ≤ line_end` ordered by `(line_end - line_start) ASC LIMIT 1`. Innermost-wins is required because symbols nest (class methods inside classes, closures inside functions); attributing one statement to all enclosing scopes would inflate `total_statements` 2-3× on real codebases. Symbols whose range covers 0 statements (interfaces, type aliases) get no coverage row → `NULL` in `LEFT JOIN coverage`. (No prior table uses this projection — `markers` are line-pinned, `calls` are name-keyed; the projection is novel for this plan.) Statements that fall outside every symbol range (top-level expression statements, side-effect imports) increment `skipped.statements_no_symbol` for observability — no row written. | -| D8 | **Path normalisation: project-relative, forward-slashed.** Istanbul writes absolute paths; we strip `/` and replace `\\` with `/` to match `files.path`. Files outside the project root land in `skipped.unmatched_files`. Same projection `toProjectRelative()` (in `validate-engine`) already does — reuse the helper instead of rewriting it. | -| D9 | **MCP / HTTP exposure: column in `query` results, NOT a separate `coverage` tool.** The killer recipe (top of this doc) is one SQL query — `query` / `query_recipe` already returns `coverage_pct` as a column when the SELECT asks for it. A standalone `coverage` MCP tool would duplicate the surface; revisit only if a consumer ships a wrapper script that proves the SQL ergonomic gap is real. | -| D10 | **`codemap audit` integration deferred.** Adding `--delta coverage` to `audit` is the natural next step (flag "files where `coverage_pct` dropped >5% vs `--base`") but layered on top of D1–D9. Track as v1.x backlog; ship the ingester + raw schema first. | -| D11 | **Ingester lives in `application/coverage-engine.ts`.** Same engine-vs-CLI split as `impact` / `audit`: pure ingester (`ingestIstanbul` / `ingestLcov`, both calling `upsertCoverageRows`) consumed by `cli/cmd-ingest-coverage.ts`. No MCP / HTTP transport in v1 (D9). Engine is unit-testable against fixture artifacts without spinning up the CLI. Edge cases the engine guards: `total_statements = 0 → coverage_pct = NULL` (not 0; "untested" and "no testable code" are different); name-collision tolerance: same `(file_path, name, line_start)` is unique by construction (two functions can't start on the same line), so the natural-key PK never collides — but cross-file name collisions (`init` in `a.ts` and `b.ts`) mean the killer recipe's `callee_name = s.name` predicate is name-only / lossy in v1. **Mitigation shipped in v1**: the bundled `untested-and-dead.md` documents the limitation and shows three concrete narrowing patterns the agent can apply (scope by `file_path LIKE 'src/api/%'`, exclude framework re-exports via `is_default_export = 0`, restrict to `is_exported = 1` already in the recipe). v1.x can add a `caller_file_path` column to `calls` for fully-precise resolution if a consumer's narrowing pattern proves insufficient. | -| D13 | **Bundled recipe shelf — every common agent question gets a `--recipe` verb, not "write your own SQL".** Three v1 recipes in `templates/recipes/` cover the agent journey end-to-end: (1) `untested-and-dead.{sql,md}` — exported symbols with no callers and zero coverage (the killer recipe); (2) `files-by-coverage.{sql,md}` — files ranked ascending by coverage_pct, surfacing the GROUP BY view that makes D2's deferral non-blocking; (3) `worst-covered-exports.{sql,md}` — exported symbols sorted by ascending coverage_pct, with a configurable `LIMIT`-via-frontmatter for the "show me the top 20" agent ask. Each recipe `.md` carries an `actions` block (per [PR #26](https://github.com/stainless-code/codemap/pull/26)) so agents see the suggested follow-up per row (e.g. "open-deprecation-issue", "add-test-suite"). All three appear in `--recipes-json` automatically — agents discover them at session start without reading docs. | -| D12 | **Schema bump = minor changeset.** Adds one table (`coverage`) + three meta keys; doesn't break any existing query. Per [`.agents/lessons.md`](../../.agents/lessons.md) "changesets bump policy" (verbatim: _reserve minor for schema-breaking changes that force a `.codemap.db` rebuild — matches 0.2.0 precedent: new tables/columns/`SCHEMA_VERSION` bump_), new tables + `SCHEMA_VERSION` bump = minor. The bump triggers `dropAll()` on next `codemap` run; the `coverage` table is absent on existing installs until first ingest (no migration needed — empty is the correct initial state). Subsequent `SCHEMA_VERSION` bumps preserve coverage data via the `dropAll()` exclusion (D6). | - -## Tracer-bullet plan - -Per [`tracer-bullets.mdc`](../../.cursor/rules/tracer-bullets.mdc) — vertical slices, each shippable on its own. - -| # | Tracer | Acceptance | -| --- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | **Schema + bump** in `src/db.ts`. Add the `coverage` table DDL (natural-key PK per D6 — no FK to `symbols.id`); bump `SCHEMA_VERSION`; add `coverage` to the `query_baselines`-style "preserve across `--full`" exclusion in `dropAll()`. Add `idx_coverage_file_name`. New `meta` keys (`coverage_last_ingested_at` / `_path` / `_format`) need no DDL — `meta` is `(key, value)` already. Unit test (`src/db.test.ts`) inserts + reads back one row; verifies the table survives a `dropAll()` + `createSchema()` round-trip; verifies the orphan-cleanup DELETE removes rows whose `file_path` is no longer in `files`. | Table exists; survives `--full`; orphan sweep works. | -| 2a | **Shared engine core** in `src/application/coverage-engine.ts` (new). `upsertCoverageRows({db, rows, projectRoot}) → {ingested, skipped, pruned_orphans}` — format-agnostic: takes a normalised `CoverageRow[]` (`{file_path, line, hit_count}`-shape), maps each row to the **innermost** enclosing symbol via the SQL `(line_end - line_start) ASC LIMIT 1` projection (D7), upserts `coverage` keyed on `(file_path, name, line_start)`, writes the meta keys, runs the orphan-cleanup DELETE. Computes `coverage_pct = total_statements > 0 ? hit / total * 100 : NULL`. Reuses `toProjectRelative` from `validate-engine` (D8). Plus `ingestIstanbul({db, payload, projectRoot})` — parses the Istanbul shape (`{ [absPath]: {statementMap, s, fnMap, f, branchMap, b, path} }`), normalises to `CoverageRow[]`, calls `upsertCoverageRows`. Unit tests cover both the shared core (fresh ingest, re-ingest UPSERT idempotence, unmatched file, statement-outside-symbol, nested-closure-innermost-wins, `total_statements = 0 → NULL`, orphan cleanup) and the Istanbul parser (statement-map shape, absolute-path normalisation, Windows-path handling). | Both pieces pure; deterministic upsert; Istanbul parser tested in isolation against fixture JSON. | -| 2b | **LCOV parser** in `src/application/coverage-engine.ts` — `ingestLcov({db, payload, projectRoot})`. Pure regex tokenizer over the LCOV record format (`SF:` / `DA:,` / `end_of_record`); normalises to the same `CoverageRow[]`, calls `upsertCoverageRows` — zero new write-side code. Unit tests cover: well-formed LCOV with multiple `SF` records, `DA` lines with hit / miss / both, malformed records (missing `end_of_record` → error), unmatched file. Real `bun test --coverage` output captured into a fixture for round-trip testing. | Pure parser; passes round-trip on a real `bun test --coverage` artifact. | -| 3 | **CLI verb** `cli/cmd-ingest-coverage.ts` — parses `` + `--json`. Auto-detects format: file ending `.json` → istanbul, `.info` → lcov, directory → probes for `coverage-final.json` / `lcov.info` (errors if both or neither). Reads JSON via the established runtime split (`Bun.file(path).json()` on Bun for the native-parser perf win; `readFile + JSON.parse` on Node — see [`packaging.md § Node vs Bun`](../packaging.md#node-vs-bun), mirrors `config.ts`); reads LCOV via `Bun.file(path).text()` / `readFile`. Dispatches to `ingestIstanbul` or `ingestLcov`. `main.ts` dispatcher gains the verb between existing entries. Help text in `bootstrap.ts` lists the new command. Unit tests cover: file-not-found, malformed JSON / LCOV, ambiguous directory (both files), empty directory, `--json` envelope shape. | `bun src/index.ts ingest-coverage ` and `` both write rows; `--help` lists the verb. | -| 4 | **Fixture coverage data + bundled recipe shelf (D13)** — ship two fixtures: `fixtures/minimal/coverage/coverage-final.json` (Istanbul) and `fixtures/minimal/coverage/lcov.info` (LCOV) covering the same partial coverage shape (e.g. `usePermissions` 100%, `legacyClient` 0%, `now` 50%). Three bundled recipes in `templates/recipes/` per D13: `untested-and-dead.{sql,md}` (killer recipe, with the three name-collision narrowing patterns from D11 in the `.md`), `files-by-coverage.{sql,md}` (replaces the deferred `file_coverage` table per D2), `worst-covered-exports.{sql,md}` (top-N exported symbols by ascending coverage_pct). Each `.md` includes a frontmatter `actions` block for per-row agent hints. Five golden recipes under `fixtures/golden/minimal/`: `coverage-istanbul.json`, `coverage-lcov.json` (cross-format equivalence), `untested-and-dead.json`, `files-by-coverage.json`, `worst-covered-exports.json`. Adds fixtures + all three recipes to `fixtures/minimal/README.md` "What's exercised" table. The pre-enriched `legacyClient` / `now` / `epochMs` deprecated symbols (PR #55) directly test the join axis. | Both ingesters produce identical golden rows; all three recipes queryable via `--recipe`; `--recipes-json` lists all three. | -| 5 | **Doc + agent rule + skill + changeset + plan deletion** — `docs/architecture.md` § Persistence wiring (new table, ingester engine, CLI verb, both formats, three bundled recipes), `docs/glossary.md` (`coverage_pct`, "Istanbul JSON", "LCOV", "static coverage ingestion", `untested-and-dead`, `files-by-coverage`, `worst-covered-exports`), `.agents/rules/codemap.md` + `templates/agents/rules/codemap.md` (Rule 10 lockstep — three new trigger-pattern rows: "What's untested?" → `coverage` table, "What's untested AND structurally dead?" → `--recipe untested-and-dead`, "Rank files by coverage" → `--recipe files-by-coverage`; also a `coverage` row in the table-shape table), skill `SKILL.md` `coverage` table row + recipes section update, README.md "What's exercised" + Use section (`bun test --coverage` LCOV worked example + `vitest --coverage` Istanbul worked example). Minor changeset (per D12). Plan deleted per `docs/README.md` Rule 3. | All docs consistent; agents see the `coverage` table + all three bundled recipes + new trigger patterns on next `codemap agents init`. | - -## Performance considerations - -- **Ingest cost** — one parse pass (Istanbul JSON or LCOV) + linear scan of statements + one symbol-projection SQL per statement. Per-statement projection is the hot path; backed by `idx_symbols_file` it's an index-bounded lookup. On Bun, JSON parsing uses `Bun.file(path).json()` (native parser, materially faster than V8 `JSON.parse` on multi-MB Istanbul payloads); on Node it's `readFile + JSON.parse`. LCOV is line-by-line text on both runtimes. No published benchmark yet — measurement target during tracer 2. -- **Read cost** — `coverage` is `WITHOUT ROWID` on `(file_path, name, line_start)`. The killer recipe's `LEFT JOIN coverage ON file_path AND name AND line_start` is a 3-column composite-PK lookup per row; `idx_coverage_file_name` covers the prefix. -- **Storage** — three INTEGER + one REAL + two short TEXT per row; small. Real disk impact dominated by symbol count, not coverage shape. -- **No background worker.** Ingest is single-pass; reindex is unaffected. - -## Alternatives considered - -| Candidate | Why not | -| -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Add `coverage_pct REAL` + `is_runtime_hot INTEGER` to `symbols` directly** | Per D1: shape-coupling, NULL ambiguity, schema-bump amplification. The original sketch ([research/fallow.md § C.11](../research/fallow.md#tier-c--ship-eventually-months-high-payoff-large-surface)) listed this — D1 supersedes. | -| **Auto-detect `coverage/coverage-final.json` during `codemap` runs** | Per D4: cadence mismatch + auto-probe surface. The auto-detect path is also harder to reason about ("why does this index include coverage on machine A but not B?"); explicit verb is unambiguous. | -| **Defer LCOV to v1.x (Istanbul-only v1)** | Per D3: ships a coverage feature that excludes `bun test --coverage` users — half-baked for codemap's own primary runtime, banned by the "no half-way APIs" principle. Two parser front-ends + one shared upsert core costs ~one tracer's worth of code; not worth deferring. | -| **Keep `--source istanbul\|lcov` flag** | API noise: format is auto-detectable from extension (`.json` / `.info`); a flag for "tell codemap what it can already see" is over-engineered. Misnamed files can be renamed (one-liner) cheaper than codemap can grow a flag. | -| **Embed runtime coverage tracer (V8 / Istanbul beacons)** | Out of scope per [research/fallow.md § D.16](../research/fallow.md#defer--skip) and [roadmap.md § Non-goals](../roadmap.md#non-goals-v1). This plan reads artifacts; it does not produce them. | -| **`coverage` rows keyed on `symbol_id` with `ON DELETE CASCADE`** | Original draft. Rejected per D6: `dropAll()` drops `symbols` on every `--full`, CASCADE wipes coverage, recreated `symbols` get fresh auto-increment IDs anyway → coverage permanently lost without re-ingest. The natural-key approach (D6) sidesteps the entire FK/CASCADE hazard at the cost of one explicit orphan-cleanup DELETE per ingest. | -| **Separate `file_coverage` rollup table in v1** | Per D2: aggregateable in one indexed `GROUP BY` on the symbol-level table; doubling the source-of-truth surface without a benchmark is premature. Promote in v1.x once a real query proves the cost. | -| **Separate MCP `coverage` tool returning `{symbol, pct, hit, total}` envelopes** | Per D9: the column-in-`query`-results path is composable with every existing recipe + ad-hoc SQL; a standalone tool would force a parallel surface for one column. Revisit if SQL composition proves too verbose for agents. | -| **Inline coverage into `codemap audit` v1 (`--delta coverage`)** | Per D10: the plan stays small. Audit-side delta is a clean follow-up once the raw schema lands. | -| **Persist coverage in a sibling JSON file (`/coverage.json`)** | Forces every consumer to re-implement the join; loses SQL composability. The whole point of Codemap is "it's a SQL index" — keep coverage in the same DB. | - -## Out of scope - -- **Branch / function / line coverage breakdowns** — D5; v1.x once a consumer asks with a concrete query. -- **`file_coverage` rollup table** — D2; v1.x with a benchmark. -- **Cross-file callee disambiguation** — D11; the killer recipe's `callee_name = s.name` is name-only / lossy in v1. v1.x can add a `caller_file_path` column to `calls` if a real consumer needs the precision. -- **Coverage diff against baseline** — `--save-baseline` / `--baseline` already covers arbitrary query result snapshots; coverage queries inherit it for free. A first-class `coverage_diff` would duplicate. -- **Coverage trend over time** — adjacent to telemetry; not in v1. Consumers can `--save-baseline coverage-snapshot-` periodically. -- **CI verdict / threshold logic** — same condition as `codemap audit verdict`: defer until a consumer ships a `jq` script that proves the threshold shape. -- **`--delta coverage` in `codemap audit`** — D10; layered on top of the v1 schema. -- **Auto-running tests** — Codemap reads coverage artifacts; it doesn't invoke any test runner. Test orchestration is the user's CI. -- **Source-map-aware coverage** — Istanbul's `statementMap` is post-transform; coverage rows reflect the compiled file's structure. Source-map walking to original `.ts` lines is deferred (most TS runners — `vitest`, `jest` — instrument the pre-compile source already). -- **`codemap watch` auto-reingest** — D4 explicitly excludes; revisit only if a consumer writes a watcher that re-runs `bun test --coverage` automatically. diff --git a/docs/research/fallow.md b/docs/research/fallow.md index 7f37b82..879f591 100644 --- a/docs/research/fallow.md +++ b/docs/research/fallow.md @@ -23,7 +23,7 @@ Adoption-candidate ship status. The tier tables in § 1 are preserved as the ori | B | B.8 | `--format sarif` + `--format annotations` | ✅ Shipped | PR [#43](https://github.com/stainless-code/codemap/pull/43). `codemap query --format sarif\|annotations` (also on MCP `query` / `query_recipe` tools as `format: "sarif"\|"annotations"`); `rule.id = codemap.` (`codemap.adhoc` for ad-hoc SQL); auto-detects `file_path` / `path` / `to_path` / `from_path`; aggregate recipes (`index-summary`, `markers-by-kind`) emit `results: []` + stderr warning. Per-recipe `sarifLevel` / `sarifMessage` / `sarifRuleId` overrides via frontmatter deferred to v1.x. | | C | C.9 | Framework plugin layer | ❌ Open | Big surface; worth a `plans/.md` before any code. | | C | C.10 | LSP server + Code Lens | ❌ Open | Independent but tangles with persistent-daemon non-goal. | -| C | C.11 | Static coverage ingestion | ❌ Open | Schema bump; one-shot ingester. | +| C | C.11 | Static coverage ingestion | ✅ Shipped | Plan PR [#56](https://github.com/stainless-code/codemap/pull/56); implementation this PR. `codemap ingest-coverage ` reads Istanbul JSON or LCOV (both formats v1 — `bun test --coverage` user not waiting). Natural-key `coverage` table joinable to `symbols`; three bundled recipes (`untested-and-dead`, `files-by-coverage`, `worst-covered-exports`) — every common agent question is a `--recipe` verb, no compose-your-own-JOIN tax. | | D | D.12-D.16 | Suppressions / per-rule severity / `fix` / suffix-array dupes / runtime intelligence | ⏸️ Skip | See § 1 Defer / skip table for the per-row reasoning. | **Adjacent — also shipped post-refresh:** @@ -43,7 +43,7 @@ Adoption-candidate ship status. The tier tables in § 1 are preserved as the ori - "Should `actions` (A.1) live in recipe definitions or be derived?" — **(a) recipe-defined**, settled in PR [#26](https://github.com/stainless-code/codemap/pull/26). - "`codemap audit` (B.5) verdict threshold defaults?" — **defer verdict to v1.x; ship raw deltas + `jq` idiom in v1**, settled in PR [#33](https://github.com/stainless-code/codemap/pull/33). -- "Coverage ingestion (C.11) — column on `symbols` or separate `coverage` table?" — still open, blocked on C.11 prioritisation. +- "Coverage ingestion (C.11) — column on `symbols` or separate `coverage` table?" — **separate table**, settled in plan PR [#56](https://github.com/stainless-code/codemap/pull/56) (D1 + D6 — natural-key PK survives the `symbols` drop-recreate cycle on every `--full` reindex; `LEFT JOIN coverage` keeps NULL semantics explicit). - "How invasive should the framework plugin layer (C.9) be?" — still open. --- diff --git a/docs/roadmap.md b/docs/roadmap.md index 9ad5a8e..d6ebfd1 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -35,7 +35,6 @@ Codemap stays a structural-index primitive that other tools can consume. Out of ## Backlog -- [ ] **Static coverage ingestion** — `codemap ingest-coverage ` reads Istanbul `coverage-final.json` into a `coverage` table, joinable to `symbols` for "what's structurally dead AND untested?" queries. Plan: [plans/coverage-ingestion.md](./plans/coverage-ingestion.md). Adapted from [research/fallow.md § C.11](./research/fallow.md#tier-c--ship-eventually-months-high-payoff-large-surface). - [ ] **`codemap audit` verdict + thresholds** (v1.x) — `verdict: "pass" | "warn" | "fail"` driven by `codemap.config.audit.deltas[].{added_max, action}`. Triggers: two consumers ship `jq`-based threshold scripts with similar shapes, OR one consumer asks with a concrete config sketch. Until then, raw deltas + consumer-side `jq` is the CI exit-code idiom. - [ ] **Monorepo / workspace awareness** — discover workspaces from `pnpm-workspace.yaml` / `package.json` and index per-workspace dependency graphs - [ ] **Cross-agent handoff artifact** — _speculative_; layered prefix/delta JSON written on session-stop, read on session-start. Complementary to indexing rather than core to it; revisit if user demand emerges diff --git a/templates/agents/rules/codemap.md b/templates/agents/rules/codemap.md index 9c83ec2..ca0aa60 100644 --- a/templates/agents/rules/codemap.md +++ b/templates/agents/rules/codemap.md @@ -6,7 +6,7 @@ alwaysApply: true > **STOP.** Before you call Grep, Glob, SemanticSearch, or Read to answer a **structural** question about this repository — query the Codemap SQLite index first. This is not optional when the question matches a trigger pattern below. -A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** — codemap version bumps auto-apply on next run, no manual cleanup needed. +A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes, and (after `codemap ingest-coverage `) static coverage from Istanbul JSON or LCOV. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** — codemap version bumps auto-apply on next run, no manual cleanup needed. **Generic defaults:** This rule is **project-agnostic**. After **`codemap agents init`** (or copying these files into **`.agents/`**), **edit your copy** to add app-specific triggers and SQL — upstream text is only a baseline. @@ -37,6 +37,7 @@ Install **[@stainless-code/codemap](https://www.npmjs.com/package/@stainless-cod | Targeted read (metadata) | `codemap show [--kind ] [--in ] [--json]` — file:line + signature | | Targeted read (source text) | `codemap snippet [--kind ] [--in ] [--json]` — same lookup + source from disk + stale flag | | Impact (blast-radius walker) | `codemap impact [--direction up\|down\|both] [--depth N] [--via ] [--limit N] [--summary] [--json]` — replaces hand-composed `WITH RECURSIVE` queries | +| Coverage ingest | `codemap ingest-coverage [--json]` — Istanbul (`coverage-final.json`) or LCOV (`lcov.info`); format auto-detected. Joinable to `symbols` for "untested AND dead" queries. | | SARIF / GH annotations | `codemap query --recipe deprecated-symbols --format sarif` · `… --format annotations` | | HTTP server (for non-MCP) | `codemap serve [--host 127.0.0.1] [--port 7878] [--token ] [--watch] [--debounce ]` — same tool taxonomy over POST /tool/{name}. | | Watch mode (live reindex) | `codemap watch [--debounce 250] [--quiet]` — long-running; debounced reindex on file changes. Combine with `codemap mcp --watch` / `codemap serve --watch` (or `CODEMAP_WATCH=1`) so every tool reads a live index without per-request prelude. | @@ -118,6 +119,10 @@ If the question looks like any of these → use the index: | "Is symbol X deprecated?" / "What does X do?" | `symbols` (`doc_comment`) | | "What's `@internal` / `@beta` / `@alpha` / `@private`?" | `symbols.visibility` (parsed JSDoc tag — not regex) | | "Who calls X?" / "What does X call?" | `calls` | +| "Is symbol X tested?" / "What's the coverage of file Y?" | `coverage` (after `ingest-coverage`) | +| "What's structurally dead AND untested?" | `--recipe untested-and-dead` | +| "Rank files by test coverage" | `--recipe files-by-coverage` | +| "Worst-covered exported functions" | `--recipe worst-covered-exports` | ## When Grep / Read IS appropriate @@ -165,6 +170,8 @@ codemap query --json "" | Who calls X? | `SELECT DISTINCT caller_name, file_path FROM calls WHERE callee_name = '...'` | | What does X call? | `SELECT DISTINCT callee_name FROM calls WHERE caller_name = '...'` | | Call hotspots | `SELECT callee_name, COUNT(*) as fan_in FROM calls GROUP BY callee_name ORDER BY fan_in DESC LIMIT 10` | +| Symbol coverage | `SELECT name, hit_statements, total_statements, coverage_pct FROM coverage WHERE file_path = '...'` | +| Untested + dead exports | `codemap query --json --recipe untested-and-dead` | **Use `DISTINCT`** on dependency and import queries — a file importing multiple specifiers from the same module produces duplicate rows. diff --git a/templates/agents/skills/codemap/SKILL.md b/templates/agents/skills/codemap/SKILL.md index 265c7e9..98692dc 100644 --- a/templates/agents/skills/codemap/SKILL.md +++ b/templates/agents/skills/codemap/SKILL.md @@ -267,6 +267,21 @@ User-facing baselines saved by `codemap query --save-baseline`, replayed by `cod | git_ref | TEXT | `git rev-parse HEAD` at save time, or NULL when not a git working tree | | created_at | INTEGER | `Date.now()` at save time (epoch ms) | +### `coverage` — Statement coverage (user data, ingested via `codemap ingest-coverage`) + +Static coverage from Istanbul JSON or LCOV. Joinable to `symbols` for "what's untested?" queries. **Survives `--full` and SCHEMA bumps** — intentionally absent from `dropAll()`. Empty until first ingest. + +| Column | Type | Description | +| ---------------- | ------- | -------------------------------------------------------------------------------------------------------- | +| file_path | TEXT PK | Project-relative path; matches `symbols.file_path`. Forward-slashed (Windows paths normalised on ingest) | +| name | TEXT PK | Symbol name (matches `symbols.name`). Same `(file_path, name, line_start)` is unique by construction | +| line_start | INT PK | Symbol's starting line (matches `symbols.line_start`). Disambiguates re-declared names | +| coverage_pct | REAL | Percentage 0.0–100.0; `NULL` when `total_statements = 0` (zero-statement scope; not the same as 0%) | +| hit_statements | INTEGER | Count of statements with non-zero hit count after innermost-wins projection | +| total_statements | INTEGER | Count of statements that projected onto this symbol | + +Three meta keys (`coverage_last_ingested_at` / `_path` / `_format`) record freshness — single ingest at a time, format is meta-level. + ## Query patterns ### Basic lookups From ac2d7e1df186906233b7adae2d9864f2be4b3648 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:33:07 +0300 Subject: [PATCH 07/10] fix(db): drop semicolon inside `--` line comment in coverage index DDL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI Build (Node, better-sqlite3) failed with "RangeError: The supplied SQL string contains no statements" on `node dist/index.mjs --full`. Root cause is the .agents/lessons.md lesson on naive `;` splitting: the new coverage index comment contained `symbols.{file_path,name,line_start};`, and runSql() splits multi-statement strings on `;` for better-sqlite3 (one statement per prepare). The trailing semicolon inside the `--` comment created an empty fragment that better-sqlite3 rejects. Reworded the comment to use parentheses + a period — same intent, no semicolon. Verified locally via `bun run build && node dist/index.mjs --full` against /tmp project: full index now completes on Node. --- src/db.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/db.ts b/src/db.ts index 31258ca..d6ba3d7 100644 --- a/src/db.ts +++ b/src/db.ts @@ -222,8 +222,8 @@ export function createIndexes(db: CodemapDatabase) { CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name, file_path); CREATE INDEX IF NOT EXISTS idx_calls_file ON calls(file_path); - -- Mirrors the typical join shape symbols.{file_path,name,line_start}; - -- the (file_path, name) prefix also covers GROUP BY file_path scans + -- Mirrors the typical join shape symbols.(file_path, name, line_start). + -- The (file_path, name) prefix also covers GROUP BY file_path scans -- used by the bundled files-by-coverage recipe (D2 + D13). CREATE INDEX IF NOT EXISTS idx_coverage_file_name ON coverage(file_path, name); `); From a342d482b628a6a9c5a7dc7d4bbddcd8fc22a076 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 11:58:46 +0300 Subject: [PATCH 08/10] =?UTF-8?q?fix(coverage):=20apply=20CodeRabbit=20rev?= =?UTF-8?q?iew=20on=20PR=20#57=20=E2=80=94=203=20valid=20threads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All three CodeRabbit comments fact-check as ✅ Correct. Apply each: 1. **architecture.md:25 — broken anchor** `#static-coverage-ingestion` doesn't exist (the section heading is `### coverage — Statement coverage…`, not "Static coverage ingestion"). Fix: drop the parenthetical anchor link, replace with "schema in § Schema → coverage" pointing at the existing § Schema anchor that does exist. 2. **fixtures/minimal/README.md:24 — overstated deprecation focus.** Said "exercise the join axis against `@deprecated` symbols" but only 1 of 6 golden rows is `@deprecated` (legacyClient); the others (`FormatPrice`, `run`, `_epochSeconds`, `nanoseconds`, `_hiResEpoch`) carry `@internal`/`@alpha`/`@private`/no tag. Reword to "across exported functions of every visibility tag" — accurate and tag-neutral. 3. **untested-and-dead.md:21 — SQL precedence bug in narrowing pattern.** `AND s.visibility IS NULL OR s.visibility = 'public'` parses as `(... AND s.visibility IS NULL) OR (s.visibility = 'public')` per SQL precedence (AND binds tighter than OR). Every row with `visibility = 'public'` would bypass every other WHERE predicate — the agent following this pattern would get a much larger result set than intended. Fix: wrap in parentheses + explicit comment that the parens are load-bearing so future edits don't drop them. files-hashes.json refreshed for the README touch. --- docs/architecture.md | 14 ++++----- fixtures/golden/minimal/files-hashes.json | 2 +- fixtures/minimal/README.md | 36 +++++++++++------------ templates/recipes/untested-and-dead.md | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index 9a14375..e257837 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -16,13 +16,13 @@ A local SQLite database (`.codemap/index.db`) indexes the project tree and store ## Layering -| Layer | Role | -| -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **`cli/`** (`bootstrap`, `main`, `cmd-*`) | Parses argv; **dynamic `import()`** loads only the command chunk (`cmd-index`, `cmd-query`, `cmd-agents`) so `--help` / `version` / `agents init` avoid the indexer. | -| **`api.ts`** | Public programmatic surface: `createCodemap()`, `Codemap` (`query`, `index`), re-exports `runCodemapIndex` for advanced use. | -| **`application/`** | Pure transport-agnostic engines + handlers: `run-index.ts` / `index-engine.ts` (orchestration + indexing); `query-engine.ts` (`executeQuery` / `executeQueryBatch`); `audit-engine.ts` (`runAudit` + `resolveAuditBaselines` + `runAuditFromRef` + `makeWorktreeReindex`); `audit-worktree.ts` (sha-keyed cache + atomic populate); `context-engine.ts` (`buildContextEnvelope`); `validate-engine.ts` (`computeValidateRows` + `toProjectRelative`); `show-engine.ts` (lookup + envelope builders); `impact-engine.ts` (`findImpact` — graph blast-radius walker); `coverage-engine.ts` (`upsertCoverageRows` core + `ingestIstanbul` / `ingestLcov` parsers — see [§ Static coverage ingestion](#static-coverage-ingestion)); `query-recipes.ts` + `recipes-loader.ts` (recipe registry); `output-formatters.ts` (SARIF + GH annotations); `watcher.ts` (chokidar-backed debounced reindex; pure helpers + injectable backend); `tool-handlers.ts` + `resource-handlers.ts` (transport-agnostic tool / resource handlers shared by MCP + HTTP); `mcp-server.ts` (MCP transport — stdio); `http-server.ts` (HTTP transport — `node:http`). Engines depend on `db.ts` / `runtime.ts`; **never** on `cli/`. | -| **`adapters/`** | `LanguageAdapter` registry; built-ins call `parser.ts` / `css-parser.ts` / `markers.ts` from `parse-worker-core`. | -| **`runtime.ts` / `config.ts` / `db.ts` / …** | Config, SQLite, resolver, workers. | +| Layer | Role | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **`cli/`** (`bootstrap`, `main`, `cmd-*`) | Parses argv; **dynamic `import()`** loads only the command chunk (`cmd-index`, `cmd-query`, `cmd-agents`) so `--help` / `version` / `agents init` avoid the indexer. | +| **`api.ts`** | Public programmatic surface: `createCodemap()`, `Codemap` (`query`, `index`), re-exports `runCodemapIndex` for advanced use. | +| **`application/`** | Pure transport-agnostic engines + handlers: `run-index.ts` / `index-engine.ts` (orchestration + indexing); `query-engine.ts` (`executeQuery` / `executeQueryBatch`); `audit-engine.ts` (`runAudit` + `resolveAuditBaselines` + `runAuditFromRef` + `makeWorktreeReindex`); `audit-worktree.ts` (sha-keyed cache + atomic populate); `context-engine.ts` (`buildContextEnvelope`); `validate-engine.ts` (`computeValidateRows` + `toProjectRelative`); `show-engine.ts` (lookup + envelope builders); `impact-engine.ts` (`findImpact` — graph blast-radius walker); `coverage-engine.ts` (`upsertCoverageRows` core + `ingestIstanbul` / `ingestLcov` parsers; schema in [§ Schema → coverage](#schema)); `query-recipes.ts` + `recipes-loader.ts` (recipe registry); `output-formatters.ts` (SARIF + GH annotations); `watcher.ts` (chokidar-backed debounced reindex; pure helpers + injectable backend); `tool-handlers.ts` + `resource-handlers.ts` (transport-agnostic tool / resource handlers shared by MCP + HTTP); `mcp-server.ts` (MCP transport — stdio); `http-server.ts` (HTTP transport — `node:http`). Engines depend on `db.ts` / `runtime.ts`; **never** on `cli/`. | +| **`adapters/`** | `LanguageAdapter` registry; built-ins call `parser.ts` / `css-parser.ts` / `markers.ts` from `parse-worker-core`. | +| **`runtime.ts` / `config.ts` / `db.ts` / …** | Config, SQLite, resolver, workers. | `index.ts` is the package entry: re-exports the public API and runs `cli/main` only when executed as the main module (Node/Bun `codemap` binary). diff --git a/fixtures/golden/minimal/files-hashes.json b/fixtures/golden/minimal/files-hashes.json index b5a8ab2..5e7d373 100644 --- a/fixtures/golden/minimal/files-hashes.json +++ b/fixtures/golden/minimal/files-hashes.json @@ -7,7 +7,7 @@ }, { "path": "README.md", - "content_hash": "6333c6bbc9240f2df77da0b60e4c8653a20df7c116531616c1b25d6ceb1521b6", + "content_hash": "05f60d056b6a09c6d2024be74cd45e17f40b1126c408b67a42bc098f5159d13e", "language": "md", "line_count": 50 }, diff --git a/fixtures/minimal/README.md b/fixtures/minimal/README.md index a08e4df..aea44ec 100644 --- a/fixtures/minimal/README.md +++ b/fixtures/minimal/README.md @@ -4,24 +4,24 @@ Stable tree exercising every codemap surface — used by `src/benchmark.ts`, gol ## What's exercised -| Codemap surface | Fixture coverage | -| --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `symbols` (function / const / interface / class) | `usePermissions`, `createClient`, `setupTransport`, `openSocket`, `handshake`, `legacyClient`, `now`, `nanoseconds`, `_epochSeconds`, `_hiResEpoch`, `epochMs`, `nowIso`, `FormatPrice`, `ShopButton`, `ProductCard`, `get`, `invalidate`, `read`, `write`, `run` | -| `imports` / `exports` (named + default + re-export) | `consumer.ts` named imports; `components/shop/index.ts` barrel re-exports; `ShopButton.default.ts` default export | -| `dependencies` (resolved file→file edges) | TS imports across `api/`, `lib/`, `components/shop/`, `utils/`, `usePermissions` | -| `components` (React) | `ShopButton`, `ProductCard` (both call `usePermissions` — fan-in) | -| `calls` (caller→callee, depth >1, with cycle) | `run → createClient → setupTransport → openSocket → handshake`; non-cyclic `cache.get → store.read`; 2-node cycle `cache.invalidate ↔ store.write` | -| `markers` (TODO / FIXME / HACK / NOTE) | `notes.md` + `consumer.ts` (`XXX` is not yet a recognised kind) | -| `type_members` | `ClientConfig`, `Transport`, `ProductCardProps` | -| Visibility tags (`@internal` / `@beta` / `@alpha` / `@private`) | `_epochSeconds`, `nowIso`, `nanoseconds`, `_hiResEpoch` | -| `@deprecated` | `now`, `legacyClient`, `epochMs` (3 rows for SARIF / GH-annotations) | -| `css_variables` | `theme.css` (`--color-brand`, `--spacing-md`) | -| `css_classes` | `theme.css` (`.container`), `button.module.css` (`.primary`) | -| `css_keyframes` | `button.module.css` (`fadeIn`) | -| `--group-by owner` | `CODEOWNERS` (4 owners) | -| Project-local recipes | `.codemap/recipes/shop-symbols.{sql,md}` (with frontmatter actions) — file shape valid; loader currently runs at parse time before bootstrap, so `--recipe shop-symbols` is rejected as "unknown" until that's deferred to the runner (known limitation) | -| Self-managed `.gitignore` | `.codemap/.gitignore` (codemap-managed) | -| `coverage` (Istanbul + LCOV ingest) | `coverage/coverage-final.json` (Istanbul) + `coverage/lcov.info` (LCOV) — equivalent partial coverage shape; bundled recipes `untested-and-dead`, `files-by-coverage`, `worst-covered-exports` exercise the join axis against `@deprecated` symbols | +| Codemap surface | Fixture coverage | +| --------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `symbols` (function / const / interface / class) | `usePermissions`, `createClient`, `setupTransport`, `openSocket`, `handshake`, `legacyClient`, `now`, `nanoseconds`, `_epochSeconds`, `_hiResEpoch`, `epochMs`, `nowIso`, `FormatPrice`, `ShopButton`, `ProductCard`, `get`, `invalidate`, `read`, `write`, `run` | +| `imports` / `exports` (named + default + re-export) | `consumer.ts` named imports; `components/shop/index.ts` barrel re-exports; `ShopButton.default.ts` default export | +| `dependencies` (resolved file→file edges) | TS imports across `api/`, `lib/`, `components/shop/`, `utils/`, `usePermissions` | +| `components` (React) | `ShopButton`, `ProductCard` (both call `usePermissions` — fan-in) | +| `calls` (caller→callee, depth >1, with cycle) | `run → createClient → setupTransport → openSocket → handshake`; non-cyclic `cache.get → store.read`; 2-node cycle `cache.invalidate ↔ store.write` | +| `markers` (TODO / FIXME / HACK / NOTE) | `notes.md` + `consumer.ts` (`XXX` is not yet a recognised kind) | +| `type_members` | `ClientConfig`, `Transport`, `ProductCardProps` | +| Visibility tags (`@internal` / `@beta` / `@alpha` / `@private`) | `_epochSeconds`, `nowIso`, `nanoseconds`, `_hiResEpoch` | +| `@deprecated` | `now`, `legacyClient`, `epochMs` (3 rows for SARIF / GH-annotations) | +| `css_variables` | `theme.css` (`--color-brand`, `--spacing-md`) | +| `css_classes` | `theme.css` (`.container`), `button.module.css` (`.primary`) | +| `css_keyframes` | `button.module.css` (`fadeIn`) | +| `--group-by owner` | `CODEOWNERS` (4 owners) | +| Project-local recipes | `.codemap/recipes/shop-symbols.{sql,md}` (with frontmatter actions) — file shape valid; loader currently runs at parse time before bootstrap, so `--recipe shop-symbols` is rejected as "unknown" until that's deferred to the runner (known limitation) | +| Self-managed `.gitignore` | `.codemap/.gitignore` (codemap-managed) | +| `coverage` (Istanbul + LCOV ingest) | `coverage/coverage-final.json` (Istanbul) + `coverage/lcov.info` (LCOV) — equivalent partial coverage shape; bundled recipes `untested-and-dead`, `files-by-coverage`, `worst-covered-exports` exercise the `coverage ↔ symbols` join across exported functions of every visibility tag (`@deprecated`, `@internal`, `@alpha`, `@private`, untagged) | ## Use diff --git a/templates/recipes/untested-and-dead.md b/templates/recipes/untested-and-dead.md index 668a6da..513eaeb 100644 --- a/templates/recipes/untested-and-dead.md +++ b/templates/recipes/untested-and-dead.md @@ -18,4 +18,4 @@ Returns nothing useful until you've run `codemap ingest-coverage Date: Mon, 4 May 2026 12:05:00 +0300 Subject: [PATCH 09/10] =?UTF-8?q?fix(docs):=20bump=20architecture.md=20sch?= =?UTF-8?q?ema=20version=20mention=205=20=E2=86=92=206?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit outside-diff comment on PR #57 (architecture.md:182) — caught that I bumped SCHEMA_VERSION 5 → 6 in db.ts (Tracer 1) but left the human-readable callout in architecture.md unchanged. Now in sync. Per `docs/README.md` Rule 6, "schema version" is explicitly listed as a decision value (not inventory) so the hardcoded number is fine — it just needs to track the constant. --- docs/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture.md b/docs/architecture.md index e257837..1cfa670 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -179,7 +179,7 @@ Optional **`/config.{ts,js,json}`** (default `.codemap/config.*`; def **Fresh database:** the default CLI **`codemap`** (incremental) calls **`createSchema()`** in **`runCodemapIndex`** before **`getChangedFiles()`**, so the **`meta`** table exists before **`getMeta(..., "last_indexed_commit")`** runs on an empty **`.codemap/index.db`**. -Current schema version: **5** — see [Schema Versioning](#schema-versioning) for details. +Current schema version: **6** — see [Schema Versioning](#schema-versioning) for details. All tables use `STRICT` mode. Tables marked with `WITHOUT ROWID` store data directly in the primary key B-tree. PRAGMAs and index design: [SQLite Performance Configuration](#sqlite-performance-configuration). From 58ae24d9042327a471e48d0dd44d382cd2c41e8a Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Mon, 4 May 2026 12:07:33 +0300 Subject: [PATCH 10/10] docs(lessons): always construct gh body args via temp file, never heredoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #57's body shipped with literal backslash-backtick artifacts everywhere because the heredoc-into-`gh pr create --body` path shell-escaped every backtick. Add it to .agents/lessons.md alongside the existing template- literal backtick lesson — same root cause family (backticks + nested quoting), different surface (gh CLI vs TS template strings). Pattern: Write body → temp file → `gh pr --body-file ` → delete. One extra tool call, zero rendering surprises. --- .agents/lessons.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.agents/lessons.md b/.agents/lessons.md index ba03283..d749369 100644 --- a/.agents/lessons.md +++ b/.agents/lessons.md @@ -12,3 +12,4 @@ Each entry is a single bullet: `- **** — `. Newest entries at t - **agent rule + skill maintenance** — when shipping a CLI flag, recipe id, recipe `actions` template, schema column, or any agent-queryable surface, update **both** copies of the codemap rule + skill in the **same PR** per [docs/README.md Rule 10](../docs/README.md): `templates/agents/rules/codemap.md` + `templates/agents/skills/codemap/SKILL.md` (ships to npm) **and** `.agents/rules/codemap.md` + `.agents/skills/codemap/SKILL.md` (this clone's mirror). Drift between the two pairs should be CLI-prefix-only (`codemap` vs `bun src/index.ts`). Forgetting this leaves installed agents with a stale view of the CLI — that's how `--summary` / `--changed-since` / `--group-by` / `actions` / `symbols.visibility` shipped without any `templates/agents/` mention until PR #29 retro-fixed it. - **backticks inside SQL or help-text template literals** — never put a literal backtick inside a `` `...` `` template-literal string. `db.ts` SQL DDL strings (multi-line CREATE TABLE templates) and `printQueryCmdHelp()` (multi-line help text) are both `` `...` `` template literals; an inner backtick — typically a Markdown-style code-fence around a flag like `` `--full` `` — terminates the literal early and the parser blows up several lines later with cryptic "expected `,` or `)`" errors. **Use plain prose in those strings** (`--full` not `` `--full` ``), or escape (`` \` ``) if you really need the character. Hit twice (B.7 + B.6 PR #30); the lesson is general — applies to any TS template literal that gets pasted prose later, not just SQL / help text. - **STOP-before-Grep applies to symbol lookups too** — `Grep` for symbol names like `printQueryResult`, `getCurrentCommit`, `dropAll` violates the [`codemap` rule](rules/codemap.md). The codemap query `SELECT file_path, line_start FROM symbols WHERE name = ''` answers it faster and without scanning. Reach for `Grep` only when the question is content-shaped (regex over file bodies, finding pattern usages inside function bodies, etc.) — not when it's "where is X defined / who calls X / what does file Y export." This was a PR #30 self-correction. +- **PR / issue / comment bodies always go through a temp file** — never pass markdown bodies via shell heredoc to `gh pr create --body "$(cat <<'EOF'…)"` / `gh pr edit --body …` / `gh pr comment --body …` / `gh issue create --body …` / `gh api` `--field body=…`. Backticks inside the heredoc (every code span and code fence) get shell-escaped to `\`` and render literally on GitHub — every recipe id, file path, flag, SQL fragment, and code fence in the rendered body comes out as `\`coverage\``instead of`coverage`. Pattern: write the body to a temp file (`Write`to`/tmp/pr--body.md`), pass `--body-file /tmp/pr--body.md`, then delete the temp file. Cost is one extra tool call; saves redoing every PR body that has more than a few backticks. Hit on PR #57 — final body was a wall of `\`` artifacts until rewritten via temp file.