AssemblyAI · dlange-aai · Jun 4, 2026 · Jun 4, 2026
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "assemblyai",
-  "version": "4.34.0",
+  "version": "4.34.4",
   "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
   "engines": {
     "node": ">=18"

diff --git a/samples/streaming-dual-channel-mic-system/README.md b/samples/streaming-dual-channel-mic-system/README.md
@@ -109,6 +109,30 @@ between consecutive words. `channel` reliably reports the physical source
 (VAD-derived); `speaker_label` is AAI's acoustic diarization on the mixed
 mono stream. Either change is a real boundary.
 
+### Speaker revisions
+
+With `speakerLabels` enabled, the server may revise the diarization of
+_earlier_ turns once it has more context (offline reclustering). It sends a
+`SpeakerRevision` event — one message per resolve, listing only the turns whose
+labels actually changed:
+
+```ts
+transcriber.on("speakerRevision", (event) => {
+  for (const rev of event.revisions) {
+    // rev.turn_order: which earlier turn to correct
+    // rev.speaker_label: corrected turn-level label
+    // rev.words[i].speaker: corrected per-word labels
+  }
+})
+```
+
+The sample keeps each finalized turn in the DOM and, on a revision, re-renders
+the matching `turn_order` in place with the corrected labels — outlining the
+bubble in purple and appending `(revised)` to its rollup line. Revision words
+carry the new `speaker` but no client-side `channel`, so the sample merges
+speakers onto the original words by timestamp and leaves channel attribution
+untouched.
+
 ## Platform caveats
 
 - **macOS:** `getDisplayMedia({ audio: true })` does **not** capture system

diff --git a/samples/streaming-dual-channel-mic-system/index.html b/samples/streaming-dual-channel-mic-system/index.html
@@ -80,6 +80,9 @@
       #events .event-line.event-Termination {
         color: #fecaca;
       }
+      #events .event-line.event-SpeakerRevision {
+        color: #ddd6fe;
+      }
       .turn-container {
         display: flex;
         flex-direction: column;
@@ -146,6 +149,15 @@
         font-size: 0.75rem;
         padding: 0.15rem 0.5rem;
       }
+      .rollup-line.revised {
+        color: #6d28d9;
+        font-weight: bold;
+      }
+      /* A turn corrected by a later SpeakerRevision: outline its bubbles. */
+      .turn-container.revised .turn-row {
+        outline: 2px solid #8b5cf6;
+        outline-offset: 1px;
+      }
       .description {
         background: #fef9c3;
         border: 1px solid #facc15;

diff --git a/samples/streaming-dual-channel-mic-system/main.ts b/samples/streaming-dual-channel-mic-system/main.ts
@@ -219,6 +219,13 @@ async function start(): Promise<void> {
   // contiguous same-channel run within the turn) plus the rollup line on
   // end_of_turn. Keyed by turn_order so partials replace the same container.
   const turnContainers = new Map<number, HTMLElement>()
+  // Finalized turns we keep around so a later SpeakerRevision can re-render
+  // them in place. Keyed by turn_order; holds the container, its rollup line,
+  // and the last-rendered TurnEvent (so revisions merge onto known words).
+  const finalizedTurns = new Map<
+    number,
+    { container: HTMLElement; rollup: HTMLElement; turn: TurnEvent }
+  >()
   // Last committed turn-level (channel, speaker_label) composite — used to
   // detect speaker changes between consecutive *finalized turns*.
   let lastFinalTurnComposite: string | undefined
@@ -333,13 +340,51 @@ async function start(): Promise<void> {
     } | Most active channel: ${turn.channel ?? "unknown"}`
     container.after(rollupLine)
     output.scrollTop = output.scrollHeight
+
+    // Retain the finalized turn so a later SpeakerRevision can correct it.
+    finalizedTurns.set(turn.turn_order, { container, rollup: rollupLine, turn })
+  })
+
+  // SpeakerRevision: diarization-only, emitted once per offline-recluster
+  // resolve. Each revision is an *earlier* finalized turn whose speaker labels
+  // changed — we re-render that turn in place with the corrected labels and
+  // flag it as revised. Unchanged turns are not included.
+  transcriber.on("speakerRevision", (event) => {
+    logEvent("SpeakerRevision", event)
+    for (const rev of event.revisions) {
+      const entry = finalizedTurns.get(rev.turn_order)
+      if (!entry) continue // turn isn't in view (e.g. session was reset)
+
+      // Revision words carry the corrected `speaker` but no client-side
+      // `channel` attribution, so merge speakers onto the original words by
+      // timestamp and keep each word's channel/channelResolved intact.
+      const speakerByStart = new Map(rev.words.map((w) => [w.start, w.speaker]))
+      const revisedWords = entry.turn.words.map((w) => ({
+        ...w,
+        speaker: speakerByStart.get(w.start) ?? w.speaker,
+      }))
+      entry.turn = {
+        ...entry.turn,
+        speaker_label: rev.speaker_label ?? entry.turn.speaker_label,
+        words: revisedWords,
+      }
+
+      renderTurnContainer(entry.container, entry.turn, true)
+      entry.container.classList.add("revised")
+      entry.rollup.className = "rollup-line revised"
+      entry.rollup.textContent = `-- End of turn #${rev.turn_order}: speaker_label: ${
+        entry.turn.speaker_label ?? "?"
+      } | Most active channel: ${entry.turn.channel ?? "unknown"}  (revised)`
+    }
+    output.scrollTop = output.scrollHeight
   })
 
   // Expose so stop() can reset session-scoped state.
   ;(
     globalThis as unknown as { __resetTurnState?: () => void }
   ).__resetTurnState = () => {
     turnContainers.clear()
+    finalizedTurns.clear()
     lastFinalTurnComposite = undefined
   }
 

diff --git a/src/services/streaming/service.ts b/src/services/streaming/service.ts
@@ -15,6 +15,7 @@ import {
   StreamingEventMessage,
   TurnEvent,
   LLMGatewayResponseEvent,
+  SpeakerRevisionEvent,
   StreamingUpdateConfiguration,
   StreamingForceEndpoint,
   WarningEvent,
@@ -262,6 +263,10 @@ export class StreamingTranscriber {
       searchParams.set("prompt", this.params.prompt);
     }
 
+    if (this.params.agentContext) {
+      searchParams.set("agent_context", this.params.agentContext);
+    }
+
     if (this.params.filterProfanity) {
       searchParams.set(
         "filter_profanity",
@@ -274,7 +279,9 @@ export class StreamingTranscriber {
         "[Deprecation Warning] The speech model `u3-pro` is deprecated and will be removed in a future release. Please use `u3-rt-pro` instead.",
       );
     }
-    searchParams.set("speech_model", this.params.speechModel.toString());
+    if (this.params.speechModel !== undefined) {
+      searchParams.set("speech_model", this.params.speechModel.toString());
+    }
 
     if (this.params.languageDetection !== undefined) {
       searchParams.set(
@@ -389,6 +396,10 @@ export class StreamingTranscriber {
       searchParams.set("redact_pii_sub", this.params.redactPiiSub);
     }
 
+    if (this.params.mode !== undefined) {
+      searchParams.set("mode", this.params.mode);
+    }
+
     if (this.params.llmGateway !== undefined) {
       searchParams.set("llm_gateway", JSON.stringify(this.params.llmGateway));
     }
@@ -404,6 +415,10 @@ export class StreamingTranscriber {
     event: "llmGatewayResponse",
     listener: (event: LLMGatewayResponseEvent) => void,
   ): void;
+  on(
+    event: "speakerRevision",
+    listener: (event: SpeakerRevisionEvent) => void,
+  ): void;
   on(event: "warning", listener: (event: WarningEvent) => void): void;
   on(event: "vad", listener: (event: VadFrame) => void): void;
   on(event: "error", listener: (error: Error) => void): void;
@@ -508,6 +523,10 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
             this.listeners.llmGatewayResponse?.(message);
             break;
           }
+          case "SpeakerRevision": {
+            this.listeners.speakerRevision?.(message);
+            break;
+          }
           case "Warning": {
             const warning = message as WarningEvent;
             console.warn(

diff --git a/src/types/streaming/index.ts b/src/types/streaming/index.ts
@@ -91,7 +91,8 @@ export type StreamingTranscriberParams = {
   keyterms?: string[];
   keytermsPrompt?: string[];
   prompt?: string;
-  speechModel: StreamingSpeechModel;
+  agentContext?: string;
+  speechModel?: StreamingSpeechModel;
   languageDetection?: boolean;
   domain?: StreamingDomain;
   inactivityTimeout?: number;
@@ -107,6 +108,7 @@ export type StreamingTranscriberParams = {
   redactPii?: boolean;
   redactPiiPolicies?: StreamingPiiPolicy[];
   redactPiiSub?: StreamingPiiSubstitution;
+  mode?: StreamingMode;
   llmGateway?: LLMGatewayConfig;
   webhookUrl?: string;
   webhookAuthHeaderName?: string;
@@ -146,6 +148,7 @@ export type StreamingEvents =
   | "turn"
   | "speechStarted"
   | "llmGatewayResponse"
+  | "speakerRevision"
   | "warning"
   | "vad"
   | "error";
@@ -156,6 +159,7 @@ export type StreamingListeners = {
   turn?: (event: TurnEvent) => void;
   speechStarted?: (event: SpeechStartedEvent) => void;
   llmGatewayResponse?: (event: LLMGatewayResponseEvent) => void;
+  speakerRevision?: (event: SpeakerRevisionEvent) => void;
   warning?: (event: WarningEvent) => void;
   vad?: (event: VadFrame) => void;
   error?: (error: Error) => void;
@@ -165,11 +169,14 @@ export type StreamingSpeechModel =
   | "universal-streaming-english"
   | "universal-streaming-multilingual"
   | "u3-rt-pro"
+  | "u3-rt-pro-beta-1"
   | "whisper-rt"
   | "u3-pro";
 
 export type StreamingDomain = "medical-v1";
 
+export type StreamingMode = "max_accuracy" | "min_latency" | "balanced";
+
 export type VoiceFocusModel = "near-field" | "far-field";
 
 export type StreamingPiiSubstitution = "hash" | "entity_name";
@@ -330,6 +337,7 @@ export type StreamingUpdateConfiguration = {
   format_turns?: boolean;
   keyterms_prompt?: string[];
   prompt?: string;
+  agent_context?: string;
   filter_profanity?: boolean;
   interruption_delay?: number;
   turn_left_pad_ms?: number;
@@ -358,12 +366,37 @@ export type LLMGatewayResponseEvent = {
   data: unknown;
 };
 
+/**
+ * A single earlier Turn whose speaker labels were revised by reclustering.
+ * Match by `turn_order` against the original Turn; replace its per-word
+ * `speaker` assignments (and the turn-level `speaker_label`) with these. Text
+ * and word timestamps are unchanged from the original Turn.
+ */
+export type SpeakerRevisionItem = {
+  turn_order: number;
+  speaker_label?: string;
+  words: StreamingWord[];
+};
+
+/**
+ * Server-side correction to previously-emitted Turns' speaker labels.
+ * Diarization-only (emitted only when `speakerLabels` is enabled). Sent once
+ * per offline-recluster resolve; `revisions` carries one entry per earlier
+ * Turn whose label actually changed (unchanged turns are omitted). Apply each
+ * entry by matching its `turn_order`.
+ */
+export type SpeakerRevisionEvent = {
+  type: "SpeakerRevision";
+  revisions: SpeakerRevisionItem[];
+};
+
 export type StreamingEventMessage =
   | BeginEvent
   | TurnEvent
   | SpeechStartedEvent
   | TerminationEvent
   | LLMGatewayResponseEvent
+  | SpeakerRevisionEvent
   | ErrorEvent
   | WarningEvent;