diff --git a/.changeset/fix-gemini-thought-signature-part-level.md b/.changeset/fix-gemini-thought-signature-part-level.md new file mode 100644 index 000000000..2b9924225 --- /dev/null +++ b/.changeset/fix-gemini-thought-signature-part-level.md @@ -0,0 +1,26 @@ +--- +'@tanstack/ai-gemini': patch +'@tanstack/ai': minor +'@tanstack/ai-event-client': minor +--- + +fix(ai-gemini): read/write thoughtSignature at Part level + thread typed metadata through tool-call lifecycle + +Two fixes shipped together because the adapter fix is only effective once the framework also preserves provider metadata across the tool-call round-trip. + +**Adapter (Gemini):** Gemini emits `thoughtSignature` as a Part-level sibling of `functionCall` (per the `@google/genai` `Part` type definition), not nested inside `functionCall`. The `FunctionCall` type has never had a `thoughtSignature` property. The adapter was reading from `functionCall.thoughtSignature` (does not exist in SDK types) and writing it back nested inside `functionCall`, causing Gemini 3.x to reject subsequent tool-call turns with `400 INVALID_ARGUMENT: "Function call is missing a thought_signature"`. + +- **Read side:** reads `part.thoughtSignature` directly using the SDK's typed `Part` interface +- **Write side:** emits `thoughtSignature` as a Part-level sibling of `functionCall` + +**Framework (typed tool-call metadata):** + +- `ToolCall.providerMetadata: Record` is now `ToolCall.metadata?: TMetadata`, mirroring the existing typed-metadata pattern on content parts (`ImagePart`, `AudioPart`, etc.). +- `ToolCallPart` gains a typed `metadata?: TMetadata` field (also generic). +- `ToolCallStartEvent.providerMetadata` becomes `metadata` (kept as `Record` because the AGUIEvent discriminated union does not survive a generic on the event type; adapters cast to their typed shape when emitting). +- `BaseTextAdapter` and `TextAdapter` gain a sixth generic `TToolCallMetadata` (default `unknown`), exposed via `~types.toolCallMetadata` for inference at call sites. +- `InternalToolCallState` gains a `metadata?: Record` field captured at `TOOL_CALL_START` and threaded through `updateToolCallPart`, `buildAssistantMessages`, `modelMessageToUIMessage`, and `completeToolCall`, fixing a previously-silent drop of provider metadata across the client-side UIMessage pipeline (closes the gap surfaced in #403/#404). + +**Gemini concrete impl:** new `GeminiToolCallMetadata { thoughtSignature?: string }` exported from `@tanstack/ai-gemini`. The adapter declares its `TToolCallMetadata` as this type, so consumers see `toolCall.metadata?.thoughtSignature` typed end-to-end. + +**Breaking:** consumers reading `toolCall.providerMetadata` or `toolCallStartEvent.providerMetadata` should rename to `metadata`. diff --git a/packages/typescript/ai-event-client/src/index.ts b/packages/typescript/ai-event-client/src/index.ts index e934adf40..e0fea39ef 100644 --- a/packages/typescript/ai-event-client/src/index.ts +++ b/packages/typescript/ai-event-client/src/index.ts @@ -49,7 +49,7 @@ export interface DocumentPart { metadata?: unknown } -export interface ToolCallPart { +export interface ToolCallPart { type: 'tool-call' id: string name: string @@ -61,6 +61,9 @@ export interface ToolCallPart { approved?: boolean } output?: any + /** Provider-specific metadata that round-trips with the tool call. + * Mirrors `ToolCallPart.metadata` in `@tanstack/ai`. */ + metadata?: TMetadata } export interface ToolResultPart { @@ -86,15 +89,17 @@ export type MessagePart = | ToolResultPart | ThinkingPart -export interface ToolCall { +export interface ToolCall { id: string type: 'function' function: { name: string arguments: string } - /** Provider-specific metadata to carry through the tool call lifecycle */ - providerMetadata?: Record + /** Provider-specific metadata to carry through the tool call lifecycle. + * Typed per-adapter via `TToolCallMetadata` (e.g. Gemini's + * `{ thoughtSignature?: string }`). */ + metadata?: TMetadata } /** diff --git a/packages/typescript/ai-gemini/src/adapters/text.ts b/packages/typescript/ai-gemini/src/adapters/text.ts index f4efcc466..ea744f456 100644 --- a/packages/typescript/ai-gemini/src/adapters/text.ts +++ b/packages/typescript/ai-gemini/src/adapters/text.ts @@ -33,7 +33,10 @@ import type { TextOptions, } from '@tanstack/ai' import type { ExternalTextProviderOptions } from '../text/text-provider-options' -import type { GeminiMessageMetadataByModality } from '../message-types' +import type { + GeminiMessageMetadataByModality, + GeminiToolCallMetadata, +} from '../message-types' import type { GeminiClientConfig } from '../utils' /** Cast an event object to StreamChunk. Adapters construct events with string @@ -104,7 +107,8 @@ export class GeminiTextAdapter< TProviderOptions, TInputModalities, GeminiMessageMetadataByModality, - TToolCapabilities + TToolCapabilities, + GeminiToolCallMetadata > { readonly kind = 'text' as const readonly name = 'gemini' as const @@ -385,6 +389,11 @@ export class GeminiTextAdapter< `${functionCall.name}_${Date.now()}_${nextToolIndex}` const functionArgs = functionCall.args || {} + // Gemini emits thoughtSignature as a Part-level sibling of + // functionCall (per @google/genai Part type), not nested inside + // functionCall itself. + const partThoughtSignature = part.thoughtSignature || undefined + let toolCallData = toolCallMap.get(toolCallId) if (!toolCallData) { toolCallData = { @@ -395,11 +404,13 @@ export class GeminiTextAdapter< : JSON.stringify(functionArgs), index: nextToolIndex++, started: false, - thoughtSignature: - (functionCall as any).thoughtSignature || undefined, + thoughtSignature: partThoughtSignature, } toolCallMap.set(toolCallId, toolCallData) } else { + if (!toolCallData.thoughtSignature && partThoughtSignature) { + toolCallData.thoughtSignature = partThoughtSignature + } try { const existingArgs = JSON.parse(toolCallData.args) const newArgs = @@ -428,9 +439,9 @@ export class GeminiTextAdapter< timestamp, index: toolCallData.index, ...(toolCallData.thoughtSignature && { - providerMetadata: { + metadata: { thoughtSignature: toolCallData.thoughtSignature, - }, + } satisfies GeminiToolCallMetadata, }), }) } @@ -707,16 +718,24 @@ export class GeminiTextAdapter< > } - const thoughtSignature = toolCall.providerMetadata - ?.thoughtSignature as string | undefined - parts.push({ + const thoughtSignature = ( + toolCall.metadata as GeminiToolCallMetadata | undefined + )?.thoughtSignature + // Gemini requires thoughtSignature at the Part level (sibling of + // functionCall), not nested inside functionCall. Nesting it causes + // the API to reject the next turn with + // "Function call is missing a thought_signature". + const part: Part = { functionCall: { id: toolCall.id, name: toolCall.function.name, args: parsedArgs, - ...(thoughtSignature && { thoughtSignature }), - } as any, - }) + }, + } + if (thoughtSignature) { + part.thoughtSignature = thoughtSignature + } + parts.push(part) } } diff --git a/packages/typescript/ai-gemini/src/message-types.ts b/packages/typescript/ai-gemini/src/message-types.ts index 629631ced..6195740dd 100644 --- a/packages/typescript/ai-gemini/src/message-types.ts +++ b/packages/typescript/ai-gemini/src/message-types.ts @@ -130,3 +130,17 @@ export interface GeminiMessageMetadataByModality { video: GeminiVideoMetadata document: GeminiDocumentMetadata } + +/** + * Provider-specific metadata that round-trips with each Gemini tool call. + * + * `thoughtSignature` is emitted by Gemini 3.x (and 2.5 thinking) models on + * the Part containing the `functionCall`. The same signature must be echoed + * back at the Part level on the next turn or the API rejects the request + * with `400 INVALID_ARGUMENT: "Function call is missing a thought_signature"`. + * + * @see https://ai.google.dev/gemini-api/docs/thinking + */ +export interface GeminiToolCallMetadata { + thoughtSignature?: string +} diff --git a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts index 9ad04e530..3da5d4c20 100644 --- a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts +++ b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts @@ -502,10 +502,11 @@ describe('GeminiAdapter through AI', () => { expect(textParts[0].text).toBe("what's a good electric guitar?") }) - it('preserves thoughtSignature in functionCall parts when sending history back to Gemini', async () => { + it('reads Part-level thoughtSignature from Gemini 3.x streaming response', async () => { const thoughtSig = 'base64-encoded-thought-signature-xyz' - // First stream: model returns a function call with a thoughtSignature (thinking model) + // Gemini 3.x emits thoughtSignature at the Part level, as a sibling of + // functionCall (per @google/genai Part type), not nested inside functionCall. const firstStream = [ { candidates: [ @@ -513,11 +514,11 @@ describe('GeminiAdapter through AI', () => { content: { parts: [ { + thoughtSignature: thoughtSig, functionCall: { id: 'fc_001', name: 'sum_tool', args: { numbers: [1, 2, 5] }, - thoughtSignature: thoughtSig, }, }, ], @@ -533,7 +534,6 @@ describe('GeminiAdapter through AI', () => { }, ] - // Second stream: model returns the final answer const secondStream = [ { candidates: [ @@ -587,8 +587,92 @@ describe('GeminiAdapter through AI', () => { const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) expect(functionCallPart).toBeDefined() expect(functionCallPart.functionCall.name).toBe('sum_tool') - // The thoughtSignature must be preserved in the model turn's functionCall - expect(functionCallPart.functionCall.thoughtSignature).toBe(thoughtSig) + // thoughtSignature must be at the Part level, NOT nested in functionCall + expect(functionCallPart.thoughtSignature).toBe(thoughtSig) + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() + }) + + it('ignores thoughtSignature nested inside functionCall (not part of @google/genai Part type)', async () => { + // The @google/genai SDK has never typed thoughtSignature on FunctionCall; + // it only exists on Part. A nested value should be ignored. + const firstStream = [ + { + candidates: [ + { + content: { + parts: [ + { + functionCall: { + id: 'fc_nested', + name: 'sum_tool', + args: { numbers: [3, 4] }, + thoughtSignature: 'should-be-ignored', + }, + }, + ], + }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ] + + const secondStream = [ + { + candidates: [ + { + content: { parts: [{ text: 'The sum is 7.' }] }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 20, + candidatesTokenCount: 5, + totalTokenCount: 25, + }, + }, + ] + + mocks.generateContentStreamSpy + .mockResolvedValueOnce(createStream(firstStream)) + .mockResolvedValueOnce(createStream(secondStream)) + + const adapter = createTextAdapter() + + const sumTool: Tool = { + name: 'sum_tool', + description: 'Sums an array of numbers.', + execute: async (input: any) => ({ + result: input.numbers.reduce((a: number, b: number) => a + b, 0), + }), + } + + for await (const _ of chat({ + adapter, + tools: [sumTool], + messages: [{ role: 'user', content: 'What is 3 + 4?' }], + })) { + /* consume stream */ + } + + expect(mocks.generateContentStreamSpy).toHaveBeenCalledTimes(2) + + const [secondPayload] = mocks.generateContentStreamSpy.mock.calls[1] + const modelTurn = secondPayload.contents.find( + (c: any) => c.role === 'model', + ) + expect(modelTurn).toBeDefined() + + const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) + expect(functionCallPart).toBeDefined() + // No thoughtSignature should be emitted since none was at Part level + expect(functionCallPart.thoughtSignature).toBeUndefined() + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() }) it('uses function name (not toolCallId) in functionResponse and preserves the call id', async () => { diff --git a/packages/typescript/ai/src/activities/chat/adapter.ts b/packages/typescript/ai/src/activities/chat/adapter.ts index 4ccc6fc09..4e7bb4ed1 100644 --- a/packages/typescript/ai/src/activities/chat/adapter.ts +++ b/packages/typescript/ai/src/activities/chat/adapter.ts @@ -54,6 +54,7 @@ export interface StructuredOutputResult { * - TInputModalities: Supported input modalities for this model (already resolved) * - TMessageMetadata: Metadata types for content parts (already resolved) * - TToolCapabilities: Tuple of tool-kind strings supported by this model, resolved from `supports.tools` + * - TToolCallMetadata: Metadata type that round-trips with tool calls (e.g. Gemini's `thoughtSignature`) */ export interface TextAdapter< TModel extends string, @@ -61,6 +62,7 @@ export interface TextAdapter< TInputModalities extends ReadonlyArray, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray = ReadonlyArray, + TToolCallMetadata = unknown, > { /** Discriminator for adapter kind */ readonly kind: 'text' @@ -77,6 +79,7 @@ export interface TextAdapter< inputModalities: TInputModalities messageMetadataByModality: TMessageMetadataByModality toolCapabilities: TToolCapabilities + toolCallMetadata: TToolCallMetadata } /** @@ -103,7 +106,7 @@ export interface TextAdapter< * A TextAdapter with any/unknown type parameters. * Useful as a constraint in generic functions and interfaces. */ -export type AnyTextAdapter = TextAdapter +export type AnyTextAdapter = TextAdapter /** * Abstract base class for text adapters. @@ -117,12 +120,14 @@ export abstract class BaseTextAdapter< TInputModalities extends ReadonlyArray, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray = ReadonlyArray, + TToolCallMetadata = unknown, > implements TextAdapter< TModel, TProviderOptions, TInputModalities, TMessageMetadataByModality, - TToolCapabilities + TToolCapabilities, + TToolCallMetadata > { readonly kind = 'text' as const abstract readonly name: string @@ -134,6 +139,7 @@ export abstract class BaseTextAdapter< inputModalities: TInputModalities messageMetadataByModality: TMessageMetadataByModality toolCapabilities: TToolCapabilities + toolCallMetadata: TToolCallMetadata } protected config: TextAdapterConfig diff --git a/packages/typescript/ai/src/activities/chat/messages.ts b/packages/typescript/ai/src/activities/chat/messages.ts index b7f97b880..0e7a683ca 100644 --- a/packages/typescript/ai/src/activities/chat/messages.ts +++ b/packages/typescript/ai/src/activities/chat/messages.ts @@ -138,6 +138,10 @@ interface AssistantSegment { id: string type: 'function' function: { name: string; arguments: string } + /** Provider-specific metadata that round-trips with the tool call. + * Untyped at this framework layer; adapters narrow it via their + * `TToolCallMetadata` generic. */ + metadata?: unknown }> } @@ -205,6 +209,7 @@ function buildAssistantMessages(uiMessage: UIMessage): Array { name: part.name, arguments: part.arguments, }, + ...(part.metadata !== undefined && { metadata: part.metadata }), }) } break @@ -340,6 +345,7 @@ export function modelMessageToUIMessage( name: toolCall.function.name, arguments: toolCall.function.arguments, state: 'input-complete', // Model messages have complete arguments + ...(toolCall.metadata !== undefined && { metadata: toolCall.metadata }), }) } } diff --git a/packages/typescript/ai/src/activities/chat/stream/message-updaters.ts b/packages/typescript/ai/src/activities/chat/stream/message-updaters.ts index 80b94d59a..a01465a36 100644 --- a/packages/typescript/ai/src/activities/chat/stream/message-updaters.ts +++ b/packages/typescript/ai/src/activities/chat/stream/message-updaters.ts @@ -55,6 +55,7 @@ export function updateToolCallPart( name: string arguments: string state: ToolCallState + metadata?: Record }, ): Array { return messages.map((msg) => { @@ -67,6 +68,12 @@ export function updateToolCallPart( (p): p is ToolCallPart => p.type === 'tool-call' && p.id === toolCall.id, ) + // Carry forward metadata from either the new toolCall or the existing + // part. Once the adapter has emitted metadata for a tool call (e.g. + // Gemini's thoughtSignature on TOOL_CALL_START) we must not lose it on + // subsequent updates that don't re-supply it. + const metadata = toolCall.metadata ?? existing?.metadata + const toolCallPart: ToolCallPart = { type: 'tool-call', id: toolCall.id, @@ -76,6 +83,7 @@ export function updateToolCallPart( // Carry forward approval and output from the existing part ...(existing?.approval && { approval: { ...existing.approval } }), ...(existing?.output !== undefined && { output: existing.output }), + ...(metadata !== undefined && { metadata }), } if (existing) { diff --git a/packages/typescript/ai/src/activities/chat/stream/processor.ts b/packages/typescript/ai/src/activities/chat/stream/processor.ts index def1194c9..ba04e2314 100644 --- a/packages/typescript/ai/src/activities/chat/stream/processor.ts +++ b/packages/typescript/ai/src/activities/chat/stream/processor.ts @@ -899,6 +899,11 @@ export class StreamProcessor { const toolName = chunk.toolCallName + // Capture provider metadata that arrived on TOOL_CALL_START so it + // round-trips back through the assistant message on the next turn + // (e.g. Gemini's thoughtSignature). + const chunkMetadata = chunk.metadata + const newToolCall: InternalToolCallState = { id: chunk.toolCallId, name: toolName, @@ -906,6 +911,7 @@ export class StreamProcessor { state: initialState, parsedArguments: undefined, index: chunk.index ?? state.toolCalls.size, + ...(chunkMetadata !== undefined && { metadata: chunkMetadata }), } state.toolCalls.set(toolCallId, newToolCall) @@ -920,6 +926,7 @@ export class StreamProcessor { name: toolName, arguments: '', state: initialState, + ...(chunkMetadata !== undefined && { metadata: chunkMetadata }), }) this.emitMessagesChange() @@ -1386,6 +1393,7 @@ export class StreamProcessor { name: toolCall.name, arguments: toolCall.arguments, state: 'input-complete', + ...(toolCall.metadata !== undefined && { metadata: toolCall.metadata }), }) this.emitMessagesChange() @@ -1501,6 +1509,10 @@ export class StreamProcessor { name: tc.name, arguments: tc.arguments, }, + // Preserve provider metadata (e.g. Gemini thoughtSignature) on + // ProcessorResult.toolCalls so callers using process()/getResult() + // get the same round-trip support as the streaming UI path. + ...(tc.metadata !== undefined && { metadata: tc.metadata }), }) } } diff --git a/packages/typescript/ai/src/activities/chat/stream/types.ts b/packages/typescript/ai/src/activities/chat/stream/types.ts index b91bb457a..5a7cd23d8 100644 --- a/packages/typescript/ai/src/activities/chat/stream/types.ts +++ b/packages/typescript/ai/src/activities/chat/stream/types.ts @@ -25,6 +25,11 @@ export interface InternalToolCallState { state: ToolCallState parsedArguments?: any index: number + /** Provider-specific metadata that round-trips with the tool call + * (e.g. Gemini's `thoughtSignature`). Untyped at this layer because + * the stream processor is provider-agnostic; adapters narrow it + * via their `TToolCallMetadata` generic. */ + metadata?: Record } /** diff --git a/packages/typescript/ai/src/activities/chat/tools/tool-calls.ts b/packages/typescript/ai/src/activities/chat/tools/tool-calls.ts index 5ed3d9cdf..5e9c5c2ac 100644 --- a/packages/typescript/ai/src/activities/chat/tools/tool-calls.ts +++ b/packages/typescript/ai/src/activities/chat/tools/tool-calls.ts @@ -101,9 +101,7 @@ export class ToolCallManager { name, arguments: '', }, - ...(event.providerMetadata && { - providerMetadata: event.providerMetadata, - }), + ...(event.metadata !== undefined && { metadata: event.metadata }), }) } diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts index e11e7176f..978b94167 100644 --- a/packages/typescript/ai/src/types.ts +++ b/packages/typescript/ai/src/types.ts @@ -111,15 +111,17 @@ export type SchemaInput = StandardJSONSchemaV1 | JSONSchema export type InferSchemaType = T extends StandardJSONSchemaV1 ? TInput : unknown -export interface ToolCall { +export interface ToolCall { id: string type: 'function' function: { name: string arguments: string // JSON string } - /** Provider-specific metadata to carry through the tool call lifecycle */ - providerMetadata?: Record + /** Provider-specific metadata to carry through the tool call lifecycle. + * Typed per-adapter via `TToolCallMetadata`. For example, + * `@tanstack/ai-gemini` sets this to `{ thoughtSignature?: string }`. */ + metadata?: TMetadata } // ============================================================================ @@ -308,7 +310,7 @@ export interface TextPart { metadata?: TMetadata } -export interface ToolCallPart { +export interface ToolCallPart { type: 'tool-call' id: string name: string @@ -322,6 +324,9 @@ export interface ToolCallPart { } /** Tool execution output (for client tools or after approval) */ output?: any + /** Provider-specific metadata that round-trips with the tool call. + * Typed per-adapter via `TToolCallMetadata`. */ + metadata?: TMetadata } export interface ToolResultPart { @@ -889,7 +894,7 @@ export interface TextMessageEndEvent extends AGUITextMessageEndEvent { * Emitted when a tool call starts. * * @ag-ui/core provides: `toolCallId`, `toolCallName`, `parentMessageId?` - * TanStack AI adds: `model?`, `toolName` (deprecated alias), `index?`, `providerMetadata?` + * TanStack AI adds: `model?`, `toolName` (deprecated alias), `index?`, `metadata?` */ export interface ToolCallStartEvent extends AGUIToolCallStartEvent { /** Model identifier for multi-model support */ @@ -901,8 +906,11 @@ export interface ToolCallStartEvent extends AGUIToolCallStartEvent { toolName: string /** Index for parallel tool calls */ index?: number - /** Provider-specific metadata to carry into the ToolCall */ - providerMetadata?: Record + /** Provider-specific metadata to carry into the ToolCall. + * Untyped at the event layer because events flow through a discriminated + * union that does not survive generics; adapters cast it to their typed + * `TToolCallMetadata` shape when emitting. */ + metadata?: Record } /** diff --git a/packages/typescript/ai/tests/strip-to-spec-middleware.test.ts b/packages/typescript/ai/tests/strip-to-spec-middleware.test.ts index 0099fad2d..beb4aa1fa 100644 --- a/packages/typescript/ai/tests/strip-to-spec-middleware.test.ts +++ b/packages/typescript/ai/tests/strip-to-spec-middleware.test.ts @@ -27,7 +27,7 @@ describe('stripToSpec', () => { toolCallName: 'getTodos', toolName: 'getTodos', index: 0, - providerMetadata: { foo: 'bar' }, + metadata: { foo: 'bar' }, model: 'gpt-4o', }) const result = stripToSpec(chunk) diff --git a/packages/typescript/ai/tests/test-utils.ts b/packages/typescript/ai/tests/test-utils.ts index 523480648..73b239b93 100644 --- a/packages/typescript/ai/tests/test-utils.ts +++ b/packages/typescript/ai/tests/test-utils.ts @@ -108,6 +108,7 @@ export function createMockAdapter(options: { document: undefined as unknown, }, toolCapabilities: [] as ReadonlyArray, + toolCallMetadata: undefined as unknown, }, chatStream: (opts: any) => { calls.push(opts) diff --git a/packages/typescript/ai/tests/type-check.test.ts b/packages/typescript/ai/tests/type-check.test.ts index acb064216..82d05e64d 100644 --- a/packages/typescript/ai/tests/type-check.test.ts +++ b/packages/typescript/ai/tests/type-check.test.ts @@ -36,6 +36,7 @@ const mockAdapter = { document: undefined as unknown, }, toolCapabilities: [] as ReadonlyArray, + toolCallMetadata: undefined as unknown, }, chatStream: async function* () {}, structuredOutput: async () => ({ data: {}, rawText: '{}' }),