Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/huge-lizards-admire.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-gemini': minor
---

Added Gemini Realtime Adapter
4 changes: 4 additions & 0 deletions examples/ts-react-chat/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Get yours at: https://platform.openai.com/api-keys
OPENAI_API_KEY=sk-...

# Gemini API Key
# Get yours at: https://aistudio.google.com/api-keys
GEMINI_API_KEY=...

# ElevenLabs API Key (for realtime voice)
# Get yours at: https://elevenlabs.io/app/settings/api-keys
ELEVENLABS_API_KEY=xi-...
Expand Down
29 changes: 22 additions & 7 deletions examples/ts-react-chat/src/lib/use-realtime.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import { createServerFn } from '@tanstack/react-start'
import { realtimeToken } from '@tanstack/ai'
import { useRealtimeChat } from '@tanstack/ai-react'
import { openaiRealtime, openaiRealtimeToken } from '@tanstack/ai-openai'
import {
elevenlabsRealtime,
elevenlabsRealtimeToken,
} from '@tanstack/ai-elevenlabs'
import { geminiRealtime, geminiRealtimeToken } from '@tanstack/ai-gemini'
import { openaiRealtime, openaiRealtimeToken } from '@tanstack/ai-openai'
import { useRealtimeChat } from '@tanstack/ai-react'
import { createServerFn } from '@tanstack/react-start'
import type { OpenAIRealtimeVoice } from "@tanstack/ai-openai"
import type { GeminiRealtimeVoice } from "@tanstack/ai-gemini"
import { realtimeClientTools } from '@/lib/realtime-tools'

type Provider = 'openai' | 'elevenlabs'
type Provider = 'openai' | 'elevenlabs' | 'gemini'

const getRealtimeTokenFn = createServerFn({ method: 'POST' })
.inputValidator((data: { provider: Provider; agentId?: string }) => {
.inputValidator((data: { provider?: Provider; agentId?: string }) => {
if (!data.provider) throw new Error('Provider is required')
return data
})
Expand All @@ -24,6 +27,12 @@ const getRealtimeTokenFn = createServerFn({ method: 'POST' })
})
}

if (data.provider === 'gemini') {
return realtimeToken({
adapter: geminiRealtimeToken(),
})
}

if (data.provider === 'elevenlabs') {
const agentId = data.agentId || process.env.ELEVENLABS_AGENT_ID
if (!agentId) {
Expand All @@ -46,16 +55,22 @@ export function useRealtime({
temperature,
maxOutputTokens,
semanticEagerness,
voice
}: {
provider: Provider
agentId: string
outputModalities?: Array<'audio' | 'text'>
temperature?: number
maxOutputTokens?: number | 'inf'
semanticEagerness?: 'low' | 'medium' | 'high'
voice?: OpenAIRealtimeVoice | GeminiRealtimeVoice
}) {
const adapter =
provider === 'openai' ? openaiRealtime() : elevenlabsRealtime()
provider === 'openai'
? openaiRealtime()
: provider === 'gemini'
? geminiRealtime()
: elevenlabsRealtime()
Comment thread
nikas-belogolov marked this conversation as resolved.

return useRealtimeChat({
getToken: () =>
Expand All @@ -78,7 +93,7 @@ Keep your responses concise and conversational since this is a voice interface.
When using tools, briefly explain what you're doing and then share the results naturally.
If the user sends an image, describe what you see and answer any questions about it.
Be friendly and engaging!`,
voice: 'alloy',
voice: voice || provider === 'gemini' ? 'Puck' : 'alloy',
tools: realtimeClientTools,
outputModalities,
temperature,
Expand Down
7 changes: 4 additions & 3 deletions examples/ts-react-chat/src/routes/realtime.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ import {
import { AudioSparkline } from '@/components/AudioSparkline'
import { useRealtime } from '@/lib/use-realtime'

type Provider = 'openai' | 'elevenlabs'
type Provider = 'openai' | 'elevenlabs' | 'gemini'
type OutputMode = 'audio+text' | 'text-only' | 'audio-only'

const PROVIDER_OPTIONS: Array<{ value: Provider; label: string }> = [
{ value: 'gemini', label: 'Google Gemini' },
{ value: 'openai', label: 'OpenAI Realtime' },
{ value: 'elevenlabs', label: 'ElevenLabs' },
]
Expand All @@ -43,7 +44,7 @@ function outputModeToModalities(
}

function RealtimePage() {
const [provider, setProvider] = useState<Provider>('openai')
const [provider, setProvider] = useState<Provider>('gemini')
const [agentId, setAgentId] = useState('')
const [textInput, setTextInput] = useState('')
const [outputMode, setOutputMode] = useState<OutputMode>('audio+text')
Expand Down Expand Up @@ -275,7 +276,7 @@ function RealtimePage() {
</div>

{/* Tools indicator */}
{provider === 'openai' && (
{(provider === 'openai' || provider === 'gemini') && (
<div className="border-b border-orange-500/10 bg-gray-800/50 px-4 py-2">
<div className="flex items-center gap-2 text-xs text-gray-400">
<Wrench className="w-3 h-3" />
Expand Down
56 changes: 50 additions & 6 deletions packages/typescript/ai-client/src/realtime-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type {
AudioVisualization,
RealtimeMessage,
RealtimeMode,
RealtimeSessionConfig,
RealtimeStatus,
RealtimeToken,
} from '@tanstack/ai'
Expand Down Expand Up @@ -100,6 +101,7 @@ export class RealtimeClient {
this.clientTools.size > 0
? Array.from(this.clientTools.values())
: undefined

this.connection = await this.options.adapter.connect(
this.token,
toolsList,
Expand Down Expand Up @@ -281,6 +283,35 @@ export class RealtimeClient {
return this.connection?.getAudioVisualization() ?? null
}

/**
* Update the session configuration.
* This applies changes to the active connection and persists them for future reconnections.
*/
updateSession(config: Partial<RealtimeSessionConfig>): void {
// Update local options so future connections use the updated config
const sessionKeys: Array<keyof RealtimeSessionConfig> = [
'instructions',
'voice',
'vadMode',
'tools',
'outputModalities',
'temperature',
'maxOutputTokens',
'semanticEagerness',
'modelOptions',
]

for (const key of sessionKeys) {
if (key in config) {
(this.options as any)[key] = (config as any)[key]
}
}

if (this.connection) {
this.applySessionConfig()
}
}

// ============================================================================
// State Subscription
// ============================================================================
Expand Down Expand Up @@ -352,6 +383,7 @@ export class RealtimeClient {
try {
this.token = await this.options.getToken()
this.scheduleTokenRefresh()
this.connection?.updateToken?.(this.token)
// Note: Some providers may require reconnection with new token
// This is handled by the adapter implementation
} catch (error) {
Expand Down Expand Up @@ -472,6 +504,18 @@ export class RealtimeClient {
this.options.onError?.(error)
}),
)

this.unsubscribers.push(
this.connection.on('go_away', ({ timeLeft }) => {
this.options.onGoAway?.(timeLeft)
}),
)

this.unsubscribers.push(
this.connection.on('usage', (usage) => {
this.options.onUsage?.(usage)
}),
)
}

private applySessionConfig(): void {
Expand Down Expand Up @@ -500,12 +544,12 @@ export class RealtimeClient {

const toolsConfig = tools
? Array.from(this.clientTools.values()).map((t) => ({
name: t.name,
description: t.description,
inputSchema: t.inputSchema
? convertSchemaToJsonSchema(t.inputSchema)
: undefined,
}))
name: t.name,
description: t.description,
inputSchema: t.inputSchema
? convertSchemaToJsonSchema(t.inputSchema)
: undefined,
}))
: undefined

this.connection.updateSession({
Expand Down
12 changes: 11 additions & 1 deletion packages/typescript/ai-client/src/realtime-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import type {
RealtimeSessionConfig,
RealtimeStatus,
RealtimeToken,
UsageInfo,
} from '@tanstack/ai'

// ============================================================================
Expand All @@ -25,7 +26,7 @@ export interface RealtimeAdapter {
/**
* Create a connection using the provided token
* @param token - The ephemeral token from the server
* @param clientTools - Optional client-side tools to register with the provider
* @param config - Initial session configuration (voice, instructions, etc.)
* @returns A connection instance
*/
connect: (
Expand Down Expand Up @@ -64,6 +65,8 @@ export interface RealtimeConnection {
// Session management
/** Update session configuration */
updateSession: (config: Partial<RealtimeSessionConfig>) => void
/** Update token */
updateToken?: (token: RealtimeToken) => void
/** Interrupt the current response */
interrupt: () => void

Expand Down Expand Up @@ -148,6 +151,11 @@ export interface RealtimeClientOptions {
*/
semanticEagerness?: 'low' | 'medium' | 'high'

/**
* Provider-specific options
*/
modelOptions?: Record<string, unknown>

// Callbacks
onStatusChange?: (status: RealtimeStatus) => void
onModeChange?: (mode: RealtimeMode) => void
Expand All @@ -156,6 +164,8 @@ export interface RealtimeClientOptions {
onConnect?: () => void
onDisconnect?: () => void
onInterrupted?: () => void
onUsage?: (usage: UsageInfo) => void
onGoAway?: (timeLeft?: string) => void
}

// ============================================================================
Expand Down
35 changes: 4 additions & 31 deletions packages/typescript/ai-elevenlabs/src/realtime/adapter.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import { Conversation } from '@11labs/client'
import { createRealtimeEventEmitter } from '@tanstack/ai'
import type {
AnyClientTool,
AudioVisualization,
RealtimeEvent,
RealtimeEventHandler,
RealtimeMessage,
RealtimeMode,
RealtimeSessionConfig,
Expand Down Expand Up @@ -42,7 +41,7 @@ export function elevenlabsRealtime(
token: RealtimeToken,
clientToolDefs?: ReadonlyArray<AnyClientTool>,
): Promise<RealtimeConnection> {
return createElevenLabsConnection(token, options, clientToolDefs)
return createElevenLabsConnection(token, clientToolDefs)
},
}
}
Expand All @@ -52,10 +51,9 @@ export function elevenlabsRealtime(
*/
async function createElevenLabsConnection(
token: RealtimeToken,
_options: ElevenLabsRealtimeOptions,
clientToolDefs?: ReadonlyArray<AnyClientTool>,
): Promise<RealtimeConnection> {
const eventHandlers = new Map<RealtimeEvent, Set<RealtimeEventHandler<any>>>()
const { emit, on: realtimeEventEmitterOn } = createRealtimeEventEmitter()
let conversation: Awaited<
ReturnType<typeof Conversation.startSession>
> | null = null
Expand All @@ -65,19 +63,6 @@ async function createElevenLabsConnection(
const emptyFrequencyData = new Uint8Array(128)
const emptyTimeDomainData = new Uint8Array(128).fill(128)

// Helper to emit events
function emit<TEvent extends RealtimeEvent>(
event: TEvent,
payload: Parameters<RealtimeEventHandler<TEvent>>[0],
) {
const handlers = eventHandlers.get(event)
if (handlers) {
for (const handler of handlers) {
handler(payload)
}
}
}

function generateMessageId(): string {
return `el-msg-${Date.now()}-${++messageIdCounter}`
}
Expand Down Expand Up @@ -223,19 +208,7 @@ async function createElevenLabsConnection(
emit('interrupted', {})
},

on<TEvent extends RealtimeEvent>(
event: TEvent,
handler: RealtimeEventHandler<TEvent>,
): () => void {
if (!eventHandlers.has(event)) {
eventHandlers.set(event, new Set())
}
eventHandlers.get(event)!.add(handler)

return () => {
eventHandlers.get(event)?.delete(handler)
}
},
on: realtimeEventEmitterOn,

getAudioVisualization(): AudioVisualization {
return {
Expand Down
2 changes: 1 addition & 1 deletion packages/typescript/ai-elevenlabs/src/realtime/token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ export function elevenlabsRealtimeToken(
config: {
voice: overrides?.voiceId,
instructions: overrides?.systemPrompt,
providerOptions: {
modelOptions: {
agentId,
firstMessage: overrides?.firstMessage,
language: overrides?.language,
Expand Down
12 changes: 7 additions & 5 deletions packages/typescript/ai-gemini/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@
"tanstack",
"adapter"
],
"dependencies": {
"@google/genai": "^1.43.0"
},
"peerDependencies": {
"@tanstack/ai": "workspace:^"
"@tanstack/ai": "workspace:^",
"@tanstack/ai-client": "workspace:^"
},
"devDependencies": {
"@tanstack/ai": "workspace:*",
"@tanstack/ai-client": "workspace:*",
"@vitest/coverage-v8": "4.0.14",
"vite": "^7.2.7"
"vite": "^7.3.1"
},
"dependencies": {
"@google/genai": "^1.50.1"
}
}
13 changes: 13 additions & 0 deletions packages/typescript/ai-gemini/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,16 @@ export type {
GeminiDocumentMetadata,
GeminiMessageMetadataByModality,
} from './message-types'

// ============================================================================
// Realtime (Voice) Adapters
// ============================================================================

export { geminiRealtime, geminiRealtimeToken } from './realtime/index'

export type {
GeminiRealtimeModel,
GeminiRealtimeOptions,
GeminiRealtimeTokenOptions,
GeminiRealtimeVoice,
} from './realtime/index'
Loading