EntityProcess · christso · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts
@@ -8,7 +8,9 @@ import {
   directPushResults,
   directorySizeBytes,
   getResultsRepoStatus,
+  listGitRuns,
   loadConfig,
+  normalizeResultsConfig,
   resolveResultsRepoRunsDir,
   syncResultsRepo,
 } from '@agentv/core';
@@ -59,15 +61,6 @@ function getStatusMessage(error: unknown): string {
   return error instanceof Error ? error.message : String(error);
 }
 
-function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
-  return {
-    repo: config.repo,
-    path: config.path,
-    auto_push: config.auto_push === true,
-    branch_prefix: config.branch_prefix?.trim() || 'eval-results',
-  };
-}
-
 function statusForResult(result: EvaluationResult): 'PASS' | 'FAIL' | 'ERROR' {
   if (result.executionStatus === 'execution_error' || result.error) {
     return 'ERROR';
@@ -185,15 +178,45 @@ export async function listMergedResultFiles(
     };
   }
 
-  const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
-    (meta) =>
-      ({
-        ...meta,
-        filename: encodeRemoteRunId(meta.filename),
-        raw_filename: meta.filename,
+  let remoteRuns: SourcedResultFileMeta[] = [];
+  if (config.mode === 'github') {
+    try {
+      const gitRuns = await listGitRuns(resolveResultsRepoRunsDir(config));
+      remoteRuns = gitRuns.map((r) => ({
+        filename: encodeRemoteRunId(r.run_id),
+        raw_filename: r.run_id,
         source: 'remote' as const,
-      }) satisfies SourcedResultFileMeta,
-  );
+        path: r.manifest_path,
+        displayName: r.display_name,
+        timestamp: r.timestamp,
+        testCount: r.test_count,
+        passRate: r.pass_rate || 0,
+        avgScore: r.avg_score || 0,
+        sizeBytes: r.size_bytes || 0,
+      }));
+    } catch (error) {
+      console.error('git-native listing failed, falling back', error);
+      remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
+        (meta) =>
+          ({
+            ...meta,
+            filename: encodeRemoteRunId(meta.filename),
+            raw_filename: meta.filename,
+            source: 'remote' as const,
+          }) satisfies SourcedResultFileMeta,
+      );
+    }
+  } else {
+    remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
+      (meta) =>
+        ({
+          ...meta,
+          filename: encodeRemoteRunId(meta.filename),
+          raw_filename: meta.filename,
+          source: 'remote' as const,
+        }) satisfies SourcedResultFileMeta,
+    );
+  }
 
   const merged = [...localRuns, ...remoteRuns].sort((a, b) =>
     b.timestamp.localeCompare(a.timestamp),

diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
@@ -501,18 +501,15 @@ describe('serve app', () => {
         writeFileSync(
           path.join(tempDir, '.agentv', 'config.yaml'),
           `results:
+  mode: github
   repo: EntityProcess/agentv-evals
-  path: autopilot-dev/runs
 `,
         );
 
         const remoteRunDir = path.join(
           process.env.AGENTV_HOME,
-          'cache',
-          'results-repo',
+          'results',
           'EntityProcess-agentv-evals',
-          'repo',
-          'autopilot-dev',
           'runs',
           'default',
           '2026-03-26T10-00-00-000Z',
@@ -581,29 +578,42 @@ describe('serve app', () => {
 
   describe('GET /api/remote/status', () => {
     it('reports configured remote status with graceful local-only fallback', async () => {
-      mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
-      writeFileSync(
-        path.join(tempDir, '.agentv', 'config.yaml'),
-        `results:
+      const previousHome = process.env.AGENTV_HOME;
+      process.env.AGENTV_HOME = path.join(tempDir, 'agentv-home-status');
+
+      try {
+        mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
+        writeFileSync(
+          path.join(tempDir, '.agentv', 'config.yaml'),
+          `results:
+  mode: github
   repo: EntityProcess/agentv-evals
-  path: autopilot-dev/runs
 `,
-      );
+        );
 
-      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
-      const res = await app.request('/api/remote/status');
+        const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+        const res = await app.request('/api/remote/status');
 
-      expect(res.status).toBe(200);
-      const data = (await res.json()) as {
-        configured: boolean;
-        available: boolean;
-        repo: string;
-        path: string;
-      };
-      expect(data.configured).toBe(true);
-      expect(data.available).toBe(false);
-      expect(data.repo).toBe('EntityProcess/agentv-evals');
-      expect(data.path).toBe('autopilot-dev/runs');
+        expect(res.status).toBe(200);
+        const data = (await res.json()) as {
+          configured: boolean;
+          available: boolean;
+          repo: string;
+          path: string;
+        };
+        expect(data.configured).toBe(true);
+        expect(data.available).toBe(false);
+        expect(data.repo).toBe('EntityProcess/agentv-evals');
+        expect(data.path).toBe(
+          path.join(tempDir, 'agentv-home-status', 'results', 'EntityProcess-agentv-evals'),
+        );
+      } finally {
+        if (previousHome === undefined) {
+          process.env.AGENTV_HOME = undefined;
+        } else {
+          process.env.AGENTV_HOME = previousHome;
+        }
+      }
     });
   });
 

diff --git a/apps/studio/src/lib/types.ts b/apps/studio/src/lib/types.ts
@@ -257,7 +257,7 @@ export interface RemoteStatusResponse {
   configured: boolean;
   available: boolean;
   repo?: string;
-  cache_dir?: string;
+  local_dir?: string;
   path?: string;
   auto_push?: boolean;
   branch_prefix?: string;

diff --git a/docs/plans/git-native-results-goal.md b/docs/plans/git-native-results-goal.md
@@ -0,0 +1,42 @@
+# Goal: Complete git-native-results PR (#1261)
+
+## Objective
+Implement the git-native results storage architecture and land PR #1261 as a clean, tested, manually verified change.
+
+## Success Criteria
+- All implementation passes completed per design doc
+- Full test suite green (unit + integration + existing 1782 core + 553 CLI tests)
+- E2E manual test using agent-browser against real test results repo
+- Red/green UAT documented before review
+- No regressions
+
+## Work Location
+- Worktree: `agentv.worktrees/git-native-results/`
+- Branch: `feat/git-native-results`
+
+## Key Decisions Confirmed
+- Dedicated results repo model → write directly to `main` of results repo (no separate branch needed)
+- Use raw `git` subprocess (not go-git) for ls-tree / cat-file path
+- Follow exact order in design doc
+
+## Non-Goals
+- P5 zero-config mode
+- Caching
+- Multi-mode beyond github
+
+## Verification
+1. Automated tests
+2. Manual agent-browser E2E in Studio
+3. Performance check with 500+ runs repo
+4. Lint + typecheck clean
+
+Owner: Agent + Chris T
+
+## Latest Progress (2026-05-21)
+
+- Docker ownership fix implemented in docker-compose.yml (`user: "${UID}:${GID}"`)
+- Write path (`commitAndPushRun`) largely complete via parallel work
+- Read path functional but needs hardening
+- Bun dependencies reinstalled in worktree
+- GitHub Actions currently failing on dependency resolution in CI
+- Next focus: Fix CI, add tests, implement pagination
diff --git a/docs/plans/git-native-results.md b/docs/plans/git-native-results.md
@@ -0,0 +1,162 @@
+# Git-native results storage
+
+**Status**: design approved, implementation pending
+**Tracks**: issue #1259 (supersedes closed PR #1260)
+**Scope**: single PR; breaking changes accepted (no production users yet)
+
+---
+
+## Why
+
+`/api/runs` polls every 5s and does O(N) per-manifest reads (`readdir` + `statSync` + `loadResultFile` per run). At hundreds of runs it stalls; at thousands it falls over. The original PR #1260 tried to fix this with an append-only `index/runs.jsonl` file, which works but adds a second source of truth that can drift, grows forever, and requires a sha-amend dance plus a `reindex` migration command.
+
+After comparing with **entireio** (single-ref + git tree as index) and **skillfully** (explicit `sourceMode = github_import` pattern with PR-based writes for human-curated content), the cleaner architecture treats **git as the canonical store**, not as a transport layer.
+
+## Core idea
+
+The git tree IS the index. `git ls-tree -r origin/main -- runs/` lists every run path without reading any blob. `git cat-file --batch` reads existing `benchmark.json` blobs in one subprocess call. No separate index file. No drift. Natural pruning when runs are deleted. With `--filter=blob:none` clone, individual run blobs are only fetched lazily when a user opens the detail view.
+
+## Architecture
+
+### Storage
+
+- The configured remote `results.repo` is **the** storage location.
+- The local clone at `results.path` (filesystem path) is the working copy.
+- No more `.agentv/results/runs/` writes in the source project. No more gitignored results.
+
+```yaml
+# config.yaml
+results:
+  mode: github                       # required, only valid value today
+  repo: myorg/eval-results           # remote
+  path: ~/data/agentv-results        # optional; default ~/.agentv/results/<slug>/
+  auto_push: true                    # default
+```
+
+`mode: github` is explicit (extension point; mirrors skillfully's `sourceMode` pattern). `path` is the **local filesystem location** of the clone (breaking change — was previously the subdir within the remote repo). Runs always land at `<clone>/runs/<experiment>/<timestamp>/` regardless.
+
+### Writes
+
+Every `agentv eval` is one atomic operation:
+
+1. `git fetch origin --prune` (refresh; no checkout)
+2. Write artifacts into working tree at `<clone>/runs/<experiment>/<timestamp>/`
+3. `git add runs/<experiment>/<timestamp>/`
+4. `git commit -m "<title>" -m "Agentv-Run: <run-id>"` (P6 trailer baked in)
+5. If `auto_push`: `git push origin HEAD:main` with retry-on-non-fast-forward (rebase + retry)
+
+Each run is one commit. Files are unique to that run, so rebases never content-conflict.
+
+### Reads
+
+**Listing** (replaces `listResultFilesFromRunsDir`):
+- `git ls-tree -r origin/main -- runs/` → filter for `benchmark.json` paths
+- `git cat-file --batch` → read those blobs in one subprocess
+- Derive `run_id` from path (same logic as current `buildRunId`)
+- Sort by timestamp descending
+- Apply cursor pagination
+
+**Detail view file reads** (replaces `readFileSync(meta.path)`):
+- Committed: `git cat-file -p origin/main:runs/.../<file>`
+- In-progress (post-write, pre-commit): `readFileSync(<path>)` from working tree
+
+**In-progress detection**: between artifact write and commit, files exist only in the working tree. `git status --porcelain runs/` surfaces them; merge with the committed list for the Studio runs view.
+
+### Sync
+
+- `agentv eval` does its own fetch + push (no separate sync needed for own work)
+- `agentv results sync` = `git fetch origin --prune` (refresh view of others' work)
+- No more `git checkout`, no more `git pull --ff-only`
+- Studio polls `/api/runs` which reads from git object DB (already current after the most recent fetch)
+
+### Pagination
+
+`/api/runs?limit=50&cursor=<run_id>`:
+- Cursor is the `run_id` of the last item from the previous page
+- Server reads the full sorted list (one `git ls-tree` + one `git cat-file --batch`), finds the cursor, slices `[cursorIdx+1 : cursorIdx+1+limit]`, returns `next_cursor` if more remain
+- Studio uses `useInfiniteQuery` + an `IntersectionObserver` sentinel row
+
+## Implementation passes
+
+The PR is large but bounded. Suggested order within the single PR:
+
+### Pass 1 — config + paths
+
+- Update `ResultsConfig` schema: require `mode: github`, repurpose `path` as filesystem location
+- Rename `getResultsRepoCachePaths` → `getResultsRepoLocalPaths`
+- Rename `cache_dir` → `local_dir` in `ResultsRepoStatus` (wire format too)
+- Add config validation: refuse old-style `path: runs` values with migration message
+
+### Pass 2 — write path
+
+- Replace `.agentv/results/runs/` writes with direct writes to `<results.path>/runs/...`
+- `directPushResults` becomes the only write path (rename to `commitAndPushRun` since it's no longer just a "direct push" mode)
+- Add `Agentv-Run:` commit trailer
+- Drop `git checkout` from `updateCacheRepo` — only `git fetch --prune` remains
+- Rename `updateCacheRepo` → `fetchResultsRepo`
+
+### Pass 3 — read path
+
+- New `listResultFilesFromGitTree(repoDir, baseBranch)` using `git ls-tree` + `git cat-file --batch` on `benchmark.json` blobs
+- Replace `listResultFilesFromRunsDir` calls for remote runs with the new function
+- Detail view reads in `serve.ts` use `git cat-file -p <ref>:<path>` for committed runs
+- Working-tree readdir for in-progress runs (detected via `git status --porcelain`)
+- Drop `loadLightweightResults` enrichment loop in `handleRuns` — `benchmark.json` already has `target`, `experiment`, and `pass_rate`
+
+### Pass 4 — pagination
+
+- `/api/runs` accepts `limit` and `cursor` query params
+- Server slices the sorted list by cursor, returns `next_cursor`
+- `RunListResponse` gets `next_cursor?: string`
+- Studio: `runListOptions` → `infiniteQueryOptions`
+- `RunList.tsx`: flatten pages, add `IntersectionObserver` sentinel
+
+### Pass 5 — cleanup
+
+- Remove the entire P1 PR scope (closed PR #1260): `RunIndexEntry`, `appendToRunIndex`, `readRunIndex`, `reindexResultsRepo`, `agentv results reindex` command, `index/runs.jsonl` writes
+- Remove `localResults` listing — local-only mode is no longer supported
+- Remove `SourcedResultFileMeta.source` field — runs are no longer "local" or "remote", they're either committed or in-progress
+- Update docs site (`apps/web/src/content/docs/`)
+- Update skill files (`plugins/agentv-dev/skills/agentv-eval-builder/`)
+- Update examples that hardcoded `.agentv/results/runs/` paths
+
+## Breaking changes
+
+| Change | Impact |
+|--------|--------|
+| `results.repo` becomes required | Users without a results repo can't run evals until they configure one |
+| `results.path` repurposed (subdir → filesystem path) | Existing configs with `path: runs` fail loudly with migration message |
+| No more `.agentv/results/runs/` writes | Project-local results no longer exist; everything lives in the configured `path` |
+| `cache_dir` → `local_dir` in status responses | Studio + any external scripts reading status need to update |
+| `SourcedResultFileMeta.source` removed | Studio "source" badge becomes "in progress / shared" |
+
+Breaking changes accepted because no production users yet. Document in release notes; require fresh config to upgrade.
+
+## Test plan
+
+- Unit tests for `git ls-tree` + `git cat-file --batch` parsing helpers
+- Integration test that spins up a tmp git repo, writes runs via the new write path, lists via the new read path, asserts results
+- Pagination unit tests (cursor in/out of bounds, exact-boundary cases)
+- E2E: run an actual eval against a real (test-scoped) results repo, verify the commit lands with the `Agentv-Run:` trailer, `git ls-tree` shows the run, Studio renders it
+
+## Deferred to future PRs
+
+- **P5 zero-config same-repo mode** — write to `refs/agentv/runs/v1` in the source repo when no `results.repo` is configured. Independent feature; design pattern works the same.
+- **Multi-mode support** — if a cloud Studio gets built later, `mode: cloud` would mirror skillfully's "managed in Skillfully" mode. The current explicit `mode: github` field is the extension point.
+- **PR-based publishing** — for human-curated content. Eval results are machine-generated, so direct commit is correct. If users want review-before-merge for sensitive evals (e.g., regulatory benchmarks), add `share: auto-pr` later.
+- **In-memory list caching** — P2 from #1259. The git-object-DB read path is fast enough that caching is not needed today. Revisit if profiling shows it's a bottleneck.
+
+## Open implementation questions
+
+1. **Branch model**: `origin/main` or a dedicated `origin/agentv-runs/main`? Current vote: `main`, since this is a dedicated results repo.
+2. **What to do on `git fetch` failures during `agentv eval`**? Current vote: warn, proceed with stale local state, surface the error in Studio. Don't block the eval — local commit always works.
+3. **`gh` CLI dependency**: stays scoped to existing PR-related code paths. The new git-native flow uses raw `git` only.
+
+## What this PR does NOT do
+
+- Doesn't add a separate index file (the index IS the git tree)
+- Doesn't ship a `reindex` migration command (nothing to backfill — `benchmark.json` already exists per run)
+- Doesn't change the artifact format (`benchmark.json`, `index.jsonl`, per-test dirs stay as-is)
+- Doesn't add server-side caching (deferred)
+- Doesn't add PR-based publishing (deferred)
+- Doesn't touch the source repo's commit history (only the configured `results.repo`)