AgoraIO · fern-api · May 20, 2026
diff --git a/.fern/replay.lock b/.fern/replay.lock
diff --git a/.fernignore b/.fernignore
@@ -14,3 +14,6 @@ docs/
 pyproject.toml
 poetry.lock
 requirements.txt
+.fern/replay.lock
+.fern/replay.yml
+.gitattributes
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+.fern/replay.lock linguist-generated=true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -54,6 +54,7 @@ jobs:
       - name: Publish to pypi
         run: |
           poetry config repositories.remote https://upload.pypi.org/legacy/
-          poetry --no-interaction -v publish --build --repository remote --username "__token__" --password "$PYPI_API_TOKEN"
+          poetry --no-interaction -v publish --build --repository remote --username "$PYPI_USERNAME" --password "$PYPI_PASSWORD"
         env:
-          PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+          PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
diff --git a/README.md b/README.md
@@ -18,6 +18,7 @@ and multimodal flows (MLLM) for real-time audio processing.
 - [Documentation](#documentation)
 - [Reference](#reference)
 - [Mllm Flow Multimodal](#mllm-flow-multimodal)
+- [Mllm Flow Multimodal](#mllm-flow-multimodal)
 - [Usage](#usage)
 - [Async Client](#async-client)
 - [Exception Handling](#exception-handling)
@@ -278,6 +279,71 @@ client.agents.start(
 ```
 
 
+## MLLM Flow (Multimodal)
+
+For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details.
+
+```python
+from agora-agent-server-sdk import Agora
+from agora-agent-server-sdk.agents import (
+    StartAgentsRequestProperties,
+    StartAgentsRequestPropertiesAdvancedFeatures,
+    StartAgentsRequestPropertiesMllm,
+    StartAgentsRequestPropertiesMllmVendor,
+    StartAgentsRequestPropertiesTts,
+    StartAgentsRequestPropertiesTtsVendor,
+    StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionType,
+)
+
+client = Agora(
+    customer_id="YOUR_CUSTOMER_ID",
+    customer_secret="YOUR_CUSTOMER_SECRET",
+)
+
+client.agents.start(
+    appid="your_app_id",
+    name="mllm_agent",
+    properties=StartAgentsRequestProperties(
+        channel="channel_name",
+        token="your_token",
+        agent_rtc_uid="1001",
+        remote_rtc_uids=["1002"],
+        idle_timeout=120,
+        advanced_features=StartAgentsRequestPropertiesAdvancedFeatures(
+            enable_mllm=True,
+        ),
+        mllm=StartAgentsRequestPropertiesMllm(
+            url="wss://api.openai.com/v1/realtime",
+            api_key="<your_openai_api_key>",
+            vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
+            params={
+                "model": "gpt-4o-realtime-preview",
+                "voice": "alloy",
+            },
+            input_modalities=["audio"],
+            output_modalities=["text", "audio"],
+            greeting_message="Hello! I'm ready to chat in real-time.",
+        ),
+        turn_detection=StartAgentsRequestPropertiesTurnDetection(
+            type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD,
+            threshold=0.5,
+            silence_duration_ms=500,
+        ),
+        # TTS and LLM are still required but not used when MLLM is enabled
+        tts=StartAgentsRequestPropertiesTts(
+            vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT,
+            params={},
+        ),
+        llm=StartAgentsRequestPropertiesLlm(
+            url="https://api.openai.com/v1/chat/completions",
+        ),
+    ),
+)
+```
+
+
 ## Usage
 
 Instantiate and use the client with the following:
@@ -288,6 +354,9 @@ from agora_agent.agents import (
     StartAgentsRequestProperties,
     StartAgentsRequestPropertiesAsr,
     StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionConfig,
+    StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
 )
 
 client = Agora(
@@ -325,6 +394,13 @@ client.agents.start(
             greeting_message="Hello, how can I assist you today?",
             failure_message="Please hold on a second.",
         ),
+        turn_detection=StartAgentsRequestPropertiesTurnDetection(
+            config=StartAgentsRequestPropertiesTurnDetectionConfig(
+                end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech(
+                    mode="semantic",
+                ),
+            ),
+        ),
     ),
 )
 ```
@@ -341,6 +417,9 @@ from agora_agent.agents import (
     StartAgentsRequestProperties,
     StartAgentsRequestPropertiesAsr,
     StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionConfig,
+    StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
 )
 
 client = AsyncAgora(
@@ -381,6 +460,13 @@ async def main() -> None:
                 greeting_message="Hello, how can I assist you today?",
                 failure_message="Please hold on a second.",
             ),
+            turn_detection=StartAgentsRequestPropertiesTurnDetection(
+                config=StartAgentsRequestPropertiesTurnDetectionConfig(
+                    end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech(
+                        mode="semantic",
+                    ),
+                ),
+            ),
         ),
     )
 

diff --git a/reference.md b/reference.md
@@ -32,6 +32,9 @@ from agora_agent.agents import (
     StartAgentsRequestProperties,
     StartAgentsRequestPropertiesAsr,
     StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionConfig,
+    StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
 )
 
 client = Agora(
@@ -69,6 +72,13 @@ client.agents.start(
             greeting_message="Hello, how can I assist you today?",
             failure_message="Please hold on a second.",
         ),
+        turn_detection=StartAgentsRequestPropertiesTurnDetection(
+            config=StartAgentsRequestPropertiesTurnDetectionConfig(
+                end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech(
+                    mode="semantic",
+                ),
+            ),
+        ),
     ),
 )
 
@@ -242,7 +252,6 @@ The agent state to filter by. Only one state can be specified per query:
 - `RUNNING` (2): The agent is running.
 - `STOPPING` (3): The agent is stopping.
 - `STOPPED` (4): The agent has exited.
-- `RECOVERING` (5): The agent is recovering.
 - `FAILED` (6): The agent failed to execute.
 
 </dd>
@@ -516,6 +525,22 @@ client.agents.get_turns(
 <dl>
 <dd>
 
+**page_index:** `typing.Optional[int]` — The page number. Starts from 1.
+
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**page_size:** `typing.Optional[int]` — The number of dialogue turns returned per page.
+
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
 
 </dd>
@@ -540,7 +565,7 @@ client.agents.get_turns(
 <dl>
 <dd>
 
-Stop the specified conversational agent instance.
+Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned.
 </dd>
 </dl>
 </dd>
@@ -1015,6 +1040,7 @@ client.agent_management.agent_think(
 
 The action to take when the agent is in a listening state:
 - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+- `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue.
 - `ignore`: Ignore the request.
 
 </dd>

diff --git a/src/agora_agent/agent_management/client.py b/src/agora_agent/agent_management/client.py
@@ -72,6 +72,7 @@ def agent_think(
         on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
             The action to take when the agent is in a listening state:
             - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue.
             - `ignore`: Ignore the request.
 
         on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
@@ -186,6 +187,7 @@ async def agent_think(
         on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
             The action to take when the agent is in a listening state:
             - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue.
             - `ignore`: Ignore the request.
 
         on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]

diff --git a/src/agora_agent/agent_management/raw_client.py b/src/agora_agent/agent_management/raw_client.py
@@ -65,6 +65,7 @@ def agent_think(
         on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
             The action to take when the agent is in a listening state:
             - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue.
             - `ignore`: Ignore the request.
 
         on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
@@ -167,6 +168,7 @@ async def agent_think(
         on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
             The action to take when the agent is in a listening state:
             - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue.
             - `ignore`: Ignore the request.
 
         on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]

diff --git a/..._agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py b/..._agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py
@@ -2,4 +2,6 @@
 
 import typing
 
-AgentThinkAgentManagementRequestOnListeningAction = typing.Union[typing.Literal["inject", "ignore"], typing.Any]
+AgentThinkAgentManagementRequestOnListeningAction = typing.Union[
+    typing.Literal["inject", "interrupt", "ignore"], typing.Any
+]
diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_response.py b/src/agora_agent/agent_management/types/agent_think_agent_management_response.py
@@ -20,7 +20,7 @@ class AgentThinkAgentManagementResponse(UncheckedBaseModel):
 
     start_ts: typing.Optional[int] = pydantic.Field(default=None)
     """
-    Timestamp indicating when the agent was created.
+    Unix timestamp in seconds when the think request was processed.
     """
 
     if IS_PYDANTIC_V2:

diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py
diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py
diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py