diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts index 7e966d58..2fcc4a7e 100644 --- a/apps/cli/src/commands/results/remote.ts +++ b/apps/cli/src/commands/results/remote.ts @@ -3,7 +3,7 @@ import path from 'node:path'; import { DEFAULT_THRESHOLD, type EvaluationResult, - type ResultsExportConfig, + type ResultsConfig, type ResultsRepoStatus, directPushResults, directorySizeBytes, @@ -59,7 +59,7 @@ function getStatusMessage(error: unknown): string { return error instanceof Error ? error.message : String(error); } -function normalizeResultsExportConfig(config: ResultsExportConfig): Required { +function normalizeResultsConfig(config: ResultsConfig): Required { return { repo: config.repo, path: config.path, @@ -107,13 +107,13 @@ async function maybeWarnLargeArtifact(runDir: string): Promise { async function loadNormalizedResultsConfig( cwd: string, -): Promise | undefined> { +): Promise | undefined> { const repoRoot = (await findRepoRoot(cwd)) ?? cwd; const config = await loadConfig(path.join(cwd, '_'), repoRoot); - if (!config?.results?.export) { + if (!config?.results) { return undefined; } - return normalizeResultsExportConfig(config.results.export); + return normalizeResultsConfig(config.results); } export function encodeRemoteRunId(filename: string): string { diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts index 094a44b2..75f286fb 100644 --- a/apps/cli/test/commands/results/serve.test.ts +++ b/apps/cli/test/commands/results/serve.test.ts @@ -501,9 +501,8 @@ describe('serve app', () => { writeFileSync( path.join(tempDir, '.agentv', 'config.yaml'), `results: - export: - repo: EntityProcess/agentv-evals - path: autopilot-dev/runs + repo: EntityProcess/agentv-evals + path: autopilot-dev/runs `, ); @@ -586,9 +585,8 @@ describe('serve app', () => { writeFileSync( path.join(tempDir, '.agentv', 'config.yaml'), `results: - export: - repo: EntityProcess/agentv-evals - path: autopilot-dev/runs + repo: EntityProcess/agentv-evals + path: autopilot-dev/runs `, ); diff --git a/apps/studio/src/components/RunSourceToolbar.tsx b/apps/studio/src/components/RunSourceToolbar.tsx index f28254bc..2aa7b6a5 100644 --- a/apps/studio/src/components/RunSourceToolbar.tsx +++ b/apps/studio/src/components/RunSourceToolbar.tsx @@ -82,7 +82,7 @@ export function RunSourceToolbar({ ) : filter === 'all' ? (

Remote results are not configured. Add{' '} - results.export to{' '} + results to{' '} .agentv/config.yaml to enable.

diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx index 964df7ee..8461ab54 100644 --- a/apps/studio/src/routes/index.tsx +++ b/apps/studio/src/routes/index.tsx @@ -349,10 +349,7 @@ function RunsTabContent({ <>

Remote results are not configured.

- Add{' '} - - results.export - {' '} + Add results{' '} to{' '} .agentv/config.yaml diff --git a/apps/web/src/content/docs/docs/tools/studio.mdx b/apps/web/src/content/docs/docs/tools/studio.mdx index c026d6d1..d91a9acb 100644 --- a/apps/web/src/content/docs/docs/tools/studio.mdx +++ b/apps/web/src/content/docs/docs/tools/studio.mdx @@ -210,14 +210,13 @@ Studio can display runs pushed to a remote git repository by other machines or C ### Configuration -Add a `results.export` block to `.agentv/config.yaml`: +Add a `results` block to `.agentv/config.yaml`: ```yaml results: - export: - repo: EntityProcess/agentv-evals # GitHub repo (owner/repo or full URL) - path: runs # Directory within the repo - auto_push: true # Push directly to base branch after every eval run + repo: EntityProcess/agentv-evals # GitHub repo (owner/repo or full URL) + path: runs # Directory within the repo + auto_push: true # Push directly to base branch after every eval run ``` With `auto_push: true`, every `agentv eval` or `agentv pipeline bench` pushes results directly to the configured repo's base branch (e.g., `main`). Results appear immediately in Studio without requiring PR merges. diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts index cf819f0d..b7603f2d 100644 --- a/packages/core/src/evaluation/loaders/config-loader.ts +++ b/packages/core/src/evaluation/loaders/config-loader.ts @@ -36,7 +36,7 @@ export type ExecutionDefaults = { readonly pool_slots?: number; }; -export type ResultsExportConfig = { +export type ResultsConfig = { readonly repo: string; readonly path: string; readonly auto_push?: boolean; @@ -52,9 +52,7 @@ export type AgentVConfig = { readonly required_version?: string; readonly eval_patterns?: readonly string[]; readonly execution?: ExecutionDefaults; - readonly results?: { - readonly export?: ResultsExportConfig; - }; + readonly results?: ResultsConfig; readonly hooks?: HooksConfig; }; @@ -560,10 +558,7 @@ export function parseExecutionDefaults( return Object.keys(result).length > 0 ? (result as ExecutionDefaults) : undefined; } -export function parseResultsConfig( - raw: unknown, - configPath: string, -): AgentVConfig['results'] | undefined { +export function parseResultsConfig(raw: unknown, configPath: string): ResultsConfig | undefined { if (raw === undefined || raw === null) { return undefined; } @@ -572,52 +567,29 @@ export function parseResultsConfig( return undefined; } - const obj = raw as Record; - const exportConfig = parseResultsExportConfig(obj.export, configPath); - if (!exportConfig) { - return undefined; - } - - return { export: exportConfig }; -} - -export function parseResultsExportConfig( - raw: unknown, - configPath: string, -): ResultsExportConfig | undefined { - if (raw === undefined || raw === null) { - return undefined; - } - if (typeof raw !== 'object' || Array.isArray(raw)) { - logWarning(`Invalid results.export in ${configPath}, expected object`); - return undefined; - } - const obj = raw as Record; const repo = typeof obj.repo === 'string' ? obj.repo.trim() : ''; - const exportPath = typeof obj.path === 'string' ? obj.path.trim() : ''; + const resultsPath = typeof obj.path === 'string' ? obj.path.trim() : ''; if (!repo) { - logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`); + logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`); return undefined; } - if (!exportPath) { - logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`); + if (!resultsPath) { + logWarning(`Invalid results.path in ${configPath}, expected non-empty string`); return undefined; } if (obj.auto_push !== undefined && typeof obj.auto_push !== 'boolean') { - logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`); + logWarning(`Invalid results.auto_push in ${configPath}, expected boolean`); return undefined; } let branchPrefix: string | undefined; if (obj.branch_prefix !== undefined) { if (typeof obj.branch_prefix !== 'string' || obj.branch_prefix.trim().length === 0) { - logWarning( - `Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`, - ); + logWarning(`Invalid results.branch_prefix in ${configPath}, expected non-empty string`); return undefined; } branchPrefix = obj.branch_prefix.trim(); @@ -625,7 +597,7 @@ export function parseResultsExportConfig( return { repo, - path: exportPath, + path: resultsPath, ...(typeof obj.auto_push === 'boolean' && { auto_push: obj.auto_push }), ...(branchPrefix && { branch_prefix: branchPrefix }), }; diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts index 46bfa160..04419785 100644 --- a/packages/core/src/evaluation/results-repo.ts +++ b/packages/core/src/evaluation/results-repo.ts @@ -6,7 +6,7 @@ import path from 'node:path'; import { promisify } from 'node:util'; import { getAgentvHome } from '../paths.js'; -import type { ResultsExportConfig } from './loaders/config-loader.js'; +import type { ResultsConfig } from './loaders/config-loader.js'; const execFileAsync = promisify(execFile); @@ -61,9 +61,7 @@ function withFriendlyGitHubAuthError(error: unknown): Error { return new Error(message); } -export function normalizeResultsExportConfig( - config: ResultsExportConfig, -): Required { +export function normalizeResultsConfig(config: ResultsConfig): Required { return { repo: config.repo.trim(), path: config.path.trim().replace(/^\/+|\/+$/g, ''), @@ -172,7 +170,7 @@ async function updateCacheRepo(repoDir: string, baseBranch: string): Promise { - const normalized = normalizeResultsExportConfig(config); +export async function ensureResultsRepoClone(config: ResultsConfig): Promise { + const normalized = normalizeResultsConfig(config); const cachePaths = getResultsRepoCachePaths(normalized.repo); mkdirSync(cachePaths.rootDir, { recursive: true }); @@ -208,7 +206,7 @@ export async function ensureResultsRepoClone(config: ResultsExportConfig): Promi return cachePaths.repoDir; } -export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoStatus { +export function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus { if (!config) { return { configured: false, @@ -218,7 +216,7 @@ export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoS }; } - const normalized = normalizeResultsExportConfig(config); + const normalized = normalizeResultsConfig(config); const cachePaths = getResultsRepoCachePaths(normalized.repo); const persisted = readPersistedStatus(cachePaths.statusFile); @@ -235,8 +233,8 @@ export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoS }; } -export async function syncResultsRepo(config: ResultsExportConfig): Promise { - const normalized = normalizeResultsExportConfig(config); +export async function syncResultsRepo(config: ResultsConfig): Promise { + const normalized = normalizeResultsConfig(config); try { const repoDir = await ensureResultsRepoClone(normalized); @@ -257,10 +255,10 @@ export async function syncResultsRepo(config: ResultsExportConfig): Promise { - const normalized = normalizeResultsExportConfig(config); + const normalized = normalizeResultsConfig(config); const repoDir = await ensureResultsRepoClone(normalized); const baseBranch = await resolveDefaultBranch(repoDir); await updateCacheRepo(repoDir, baseBranch); @@ -274,10 +272,10 @@ export async function checkoutResultsRepoBranch( } export async function prepareResultsRepoBranch( - config: ResultsExportConfig, + config: ResultsConfig, branchName: string, ): Promise { - const normalized = normalizeResultsExportConfig(config); + const normalized = normalizeResultsConfig(config); const cloneDir = await ensureResultsRepoClone(normalized); const baseBranch = await resolveDefaultBranch(cloneDir); await updateCacheRepo(cloneDir, baseBranch); @@ -312,8 +310,8 @@ export async function stageResultsArtifacts(params: { await cp(params.sourceDir, params.destinationDir, { recursive: true }); } -export function resolveResultsRepoRunsDir(config: ResultsExportConfig): string { - const normalized = normalizeResultsExportConfig(config); +export function resolveResultsRepoRunsDir(config: ResultsConfig): string { + const normalized = normalizeResultsConfig(config); return path.join( getResultsRepoCachePaths(normalized.repo).repoDir, ...normalized.path.split('/'), @@ -354,11 +352,11 @@ export async function commitAndPushResultsBranch(params: { } export async function pushResultsRepoBranch( - config: ResultsExportConfig, + config: ResultsConfig, branchName: string, cwd?: string, ): Promise { - const normalized = normalizeResultsExportConfig(config); + const normalized = normalizeResultsConfig(config); await runGit(['push', '-u', 'origin', branchName], { cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir, }); @@ -405,12 +403,12 @@ const DIRECT_PUSH_MAX_RETRIES = 3; * Returns true if artifacts were pushed, false if no changes were detected. */ export async function directPushResults(params: { - readonly config: ResultsExportConfig; + readonly config: ResultsConfig; readonly sourceDir: string; readonly destinationPath: string; readonly commitMessage: string; }): Promise { - const normalized = normalizeResultsExportConfig(params.config); + const normalized = normalizeResultsConfig(params.config); const repoDir = await ensureResultsRepoClone(normalized); const baseBranch = await resolveDefaultBranch(repoDir); await updateCacheRepo(repoDir, baseBranch); diff --git a/packages/core/src/evaluation/validation/config-validator.ts b/packages/core/src/evaluation/validation/config-validator.ts index a6f5d1af..5196feaf 100644 --- a/packages/core/src/evaluation/validation/config-validator.ts +++ b/packages/core/src/evaluation/validation/config-validator.ts @@ -77,61 +77,42 @@ export async function validateConfigFile(filePath: string): Promise).export; - if (exportConfig !== undefined) { - if ( - typeof exportConfig !== 'object' || - exportConfig === null || - Array.isArray(exportConfig) - ) { - errors.push({ - severity: 'error', - filePath, - location: 'results.export', - message: "Field 'results.export' must be an object", - }); - } else { - const exportRecord = exportConfig as Record; - if (typeof exportRecord.repo !== 'string' || exportRecord.repo.trim().length === 0) { - errors.push({ - severity: 'error', - filePath, - location: 'results.export.repo', - message: "Field 'results.export.repo' must be a non-empty string", - }); - } - if (typeof exportRecord.path !== 'string' || exportRecord.path.trim().length === 0) { - errors.push({ - severity: 'error', - filePath, - location: 'results.export.path', - message: "Field 'results.export.path' must be a non-empty string", - }); - } - if ( - exportRecord.auto_push !== undefined && - typeof exportRecord.auto_push !== 'boolean' - ) { - errors.push({ - severity: 'error', - filePath, - location: 'results.export.auto_push', - message: "Field 'results.export.auto_push' must be a boolean", - }); - } - if ( - exportRecord.branch_prefix !== undefined && - (typeof exportRecord.branch_prefix !== 'string' || - exportRecord.branch_prefix.trim().length === 0) - ) { - errors.push({ - severity: 'error', - filePath, - location: 'results.export.branch_prefix', - message: "Field 'results.export.branch_prefix' must be a non-empty string", - }); - } - } + const resultsRecord = results as Record; + if (typeof resultsRecord.repo !== 'string' || resultsRecord.repo.trim().length === 0) { + errors.push({ + severity: 'error', + filePath, + location: 'results.repo', + message: "Field 'results.repo' must be a non-empty string", + }); + } + if (typeof resultsRecord.path !== 'string' || resultsRecord.path.trim().length === 0) { + errors.push({ + severity: 'error', + filePath, + location: 'results.path', + message: "Field 'results.path' must be a non-empty string", + }); + } + if (resultsRecord.auto_push !== undefined && typeof resultsRecord.auto_push !== 'boolean') { + errors.push({ + severity: 'error', + filePath, + location: 'results.auto_push', + message: "Field 'results.auto_push' must be a boolean", + }); + } + if ( + resultsRecord.branch_prefix !== undefined && + (typeof resultsRecord.branch_prefix !== 'string' || + resultsRecord.branch_prefix.trim().length === 0) + ) { + errors.push({ + severity: 'error', + filePath, + location: 'results.branch_prefix', + message: "Field 'results.branch_prefix' must be a non-empty string", + }); } } } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 870c0c5f..aab188c8 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -10,7 +10,7 @@ export { export { loadConfig, type AgentVConfig as AgentVYamlConfig, - type ResultsExportConfig, + type ResultsConfig, } from './evaluation/loaders/config-loader.js'; export { loadTsEvalFile, @@ -63,7 +63,7 @@ export { syncResultsRepo, getResultsRepoCachePaths, getResultsRepoStatus, - normalizeResultsExportConfig, + normalizeResultsConfig, resolveResultsRepoRunsDir, resolveResultsRepoUrl, prepareResultsRepoBranch, diff --git a/packages/core/test/evaluation/loaders/config-loader.test.ts b/packages/core/test/evaluation/loaders/config-loader.test.ts index 303bf3a2..3846b471 100644 --- a/packages/core/test/evaluation/loaders/config-loader.test.ts +++ b/packages/core/test/evaluation/loaders/config-loader.test.ts @@ -137,36 +137,42 @@ describe('extractTrialsConfig', () => { }); describe('parseResultsConfig', () => { - it('parses valid results.export config', () => { + it('parses valid results config', () => { const result = parseResultsConfig( { - export: { - repo: 'EntityProcess/agentv-evals', - path: 'autopilot-dev/runs', - auto_push: true, - branch_prefix: 'eval-results', - }, + repo: 'EntityProcess/agentv-evals', + path: 'autopilot-dev/runs', + auto_push: true, + branch_prefix: 'eval-results', }, '/tmp/.agentv/config.yaml', ); expect(result).toEqual({ - export: { - repo: 'EntityProcess/agentv-evals', + repo: 'EntityProcess/agentv-evals', + path: 'autopilot-dev/runs', + auto_push: true, + branch_prefix: 'eval-results', + }); + }); + + it('returns undefined when repo is empty', () => { + const result = parseResultsConfig( + { + repo: '', path: 'autopilot-dev/runs', - auto_push: true, - branch_prefix: 'eval-results', }, - }); + '/tmp/.agentv/config.yaml', + ); + + expect(result).toBeUndefined(); }); - it('returns undefined for invalid export config', () => { + it('returns undefined when repo is not a string', () => { const result = parseResultsConfig( { - export: { - repo: '', - path: 'autopilot-dev/runs', - }, + repo: 123, + path: 'autopilot-dev/runs', }, '/tmp/.agentv/config.yaml', ); @@ -629,42 +635,3 @@ describe('parseExecutionDefaults', () => { }); }); }); - -describe('parseResultsConfig', () => { - it('parses valid results export configuration', () => { - expect( - parseResultsConfig( - { - export: { - repo: 'EntityProcess/agentv-evals', - path: 'autopilot-dev/runs', - auto_push: true, - branch_prefix: 'eval-results', - }, - }, - '/test/.agentv/config.yaml', - ), - ).toEqual({ - export: { - repo: 'EntityProcess/agentv-evals', - path: 'autopilot-dev/runs', - auto_push: true, - branch_prefix: 'eval-results', - }, - }); - }); - - it('returns undefined for invalid results export configuration', () => { - expect( - parseResultsConfig( - { - export: { - repo: 123, - path: 'autopilot-dev/runs', - }, - }, - '/test/.agentv/config.yaml', - ), - ).toBeUndefined(); - }); -}); diff --git a/packages/core/test/evaluation/validation/config-validator.test.ts b/packages/core/test/evaluation/validation/config-validator.test.ts index 42679df5..f2adaeef 100644 --- a/packages/core/test/evaluation/validation/config-validator.test.ts +++ b/packages/core/test/evaluation/validation/config-validator.test.ts @@ -46,16 +46,15 @@ describe('validateConfigFile', () => { expect(result.errors).toHaveLength(0); }); - it('accepts results.export field without warnings', async () => { + it('accepts results field without warnings', async () => { const filePath = path.join(tempDir, 'config-results.yaml'); await writeFile( filePath, `results: - export: - repo: EntityProcess/agentv-evals - path: autopilot-dev/runs - auto_push: true - branch_prefix: eval-results + repo: EntityProcess/agentv-evals + path: autopilot-dev/runs + auto_push: true + branch_prefix: eval-results `, );