diff --git a/bench/package.json b/bench/package.json index b0e72a0..527b1b8 100644 --- a/bench/package.json +++ b/bench/package.json @@ -18,7 +18,7 @@ "dependencies": { "@tangle-network/agent-eval": "^0.83.0", "@tangle-network/agent-runtime": "file:..", - "@tangle-network/sandbox": "^0.4.0" + "@tangle-network/sandbox": "^0.4.3" }, "devDependencies": { "tsx": "^4.19.0", diff --git a/bench/pnpm-lock.yaml b/bench/pnpm-lock.yaml index 82bb57d..7d03d2d 100644 --- a/bench/pnpm-lock.yaml +++ b/bench/pnpm-lock.yaml @@ -10,13 +10,13 @@ importers: dependencies: '@tangle-network/agent-eval': specifier: ^0.83.0 - version: 0.83.0(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3) + version: 0.83.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3) '@tangle-network/agent-runtime': specifier: file:.. - version: file:..(@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))) + version: file:..(@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))) '@tangle-network/sandbox': - specifier: ^0.4.0 - version: 0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) + specifier: ^0.4.3 + version: 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) devDependencies: tsx: specifier: ^4.19.0 @@ -268,7 +268,7 @@ packages: engines: {node: '>=20'} hasBin: true peerDependencies: - '@tangle-network/agent-eval': '>=0.76.0 <1.0.0' + '@tangle-network/agent-eval': '>=0.83.0 <1.0.0' '@tangle-network/agent-knowledge': '>=1.3.0 <2.0.0' '@tangle-network/sandbox': '>=0.1.2 <0.5.0' playwright: ^1.40.0 @@ -300,8 +300,8 @@ packages: viem: optional: true - '@tangle-network/sandbox@0.4.2': - resolution: {integrity: sha512-+S9Wnrdvc1pD1cVuu6GBVT9UE+p7FVmjCj9JWU06zvZu6bdcpf4ehMUaxoTgY9/3NlykCIlaWO4mrH89/k3/5Q==} + '@tangle-network/sandbox@0.4.3': + resolution: {integrity: sha512-6QE3Nuhkd8f+OlpRJbumHTAG4wKR+ESXT47UE0fjTf7ndRWLnhE4RZ7YRtHVo/Q9ZZr0FGH1mwM+6tW0NAT1bA==} peerDependencies: '@mastra/core': ^1.36.0 '@modelcontextprotocol/sdk': ^1.29.0 @@ -558,7 +558,7 @@ snapshots: '@noble/hashes': 2.2.0 '@scure/base': 2.2.0 - '@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3)': + '@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3)': dependencies: '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.3) '@ax-llm/ax': 19.0.45(zod@4.4.3) @@ -567,7 +567,7 @@ snapshots: hono: 4.12.23 zod: 4.4.3 optionalDependencies: - '@tangle-network/sandbox': 0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) + '@tangle-network/sandbox': 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) transitivePeerDependencies: - '@mastra/core' - '@modelcontextprotocol/sdk' @@ -579,11 +579,11 @@ snapshots: '@tangle-network/agent-integrations@0.29.0': {} - '@tangle-network/agent-runtime@file:..(@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))': + '@tangle-network/agent-runtime@file:..(@tangle-network/agent-eval@0.83.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))': dependencies: - '@tangle-network/agent-eval': 0.83.0(@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3) + '@tangle-network/agent-eval': 0.83.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3) optionalDependencies: - '@tangle-network/sandbox': 0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) + '@tangle-network/sandbox': 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)) '@tangle-network/sandbox@0.3.0(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))': dependencies: @@ -591,7 +591,7 @@ snapshots: optionalDependencies: viem: 2.52.0(typescript@5.9.3)(zod@4.4.3) - '@tangle-network/sandbox@0.4.2(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))': + '@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))': optionalDependencies: viem: 2.52.0(typescript@5.9.3)(zod@4.4.3) diff --git a/bench/src/experiment.ts b/bench/src/experiment.ts index bc0db33..b645955 100644 --- a/bench/src/experiment.ts +++ b/bench/src/experiment.ts @@ -311,6 +311,10 @@ export async function runExperiment(cfg: ExperimentConfig): Promise { if (signal.aborted) throwAbort() - await box.dispatchPrompt(prompt, { sessionId, signal }) - const result = await box.session(sessionId).result() + // dispatchPrompt returns the session id the platform actually assigned, which + // may be one it MINTED rather than the supplied `sessionId`. Polling the + // supplied id when the platform minted a different one 404s the session-events + // endpoint ("Resource not found"). Always follow the assigned id. + const dispatched = await box.dispatchPrompt(prompt, { sessionId, signal }) + const activeSessionId = dispatched.sessionId + const result = await box.session(activeSessionId).result() if (signal.aborted) throwAbort() yield { type: 'result', - id: sessionId, + id: activeSessionId, data: { finalText: result.response ?? '', success: result.success,