From 47e82cb5f5032bd8ccca1a861c779e0a7fc47689 Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 20 May 2026 20:38:04 +0000 Subject: [PATCH] [fern-generated] Update SDK Generated by Fern CLI Version: unknown Generators: - fernapi/fern-python-sdk: 4.37.0 --- .fern/replay.lock | 10 + .fernignore | 3 + .gitattributes | 1 + .github/workflows/ci.yml | 5 +- .github/workflows/release.yml | 51 ---- README.md | 86 ++++++ reference.md | 30 +- src/agora_agent/agent_management/client.py | 2 + .../agent_management/raw_client.py | 2 + ..._management_request_on_listening_action.py | 4 +- .../agent_think_agent_management_response.py | 2 +- ...agent_think_request_on_listening_action.py | 5 - .../agent_think_request_on_speaking_action.py | 5 - .../agent_think_request_on_thinking_action.py | 5 - src/agora_agent/agents/client.py | 62 +++- src/agora_agent/agents/raw_client.py | 42 ++- .../agents/types/get_agents_response.py | 1 - .../types/get_agents_response_status.py | 2 +- .../agents/types/get_turns_agents_response.py | 26 ++ .../get_turns_agents_response_pagination.py} | 18 +- ...agents_response_turns_item_end_metadata.py | 4 +- ..._item_metrics_segmented_latency_ms_item.py | 12 +- .../agents/types/interrupt_agents_response.py | 2 +- ...t_agents_response_data_list_item_status.py | 2 +- ...ts_request_properties_advanced_features.py | 2 +- .../start_agents_request_properties_avatar.py | 1 + ...agents_request_properties_avatar_vendor.py | 2 +- ...ties_filler_words_content_static_config.py | 2 +- .../start_agents_request_properties_llm.py | 2 +- ...request_properties_llm_greeting_configs.py | 6 + .../start_agents_request_properties_mllm.py | 13 +- ..._request_properties_mllm_turn_detection.py | 2 +- ...s_mllm_turn_detection_server_vad_config.py | 2 +- ...t_agents_request_properties_mllm_vendor.py | 2 +- .../agents/types/start_agents_response.py | 1 - .../types/start_agents_response_status.py | 2 +- .../agents/types/update_agents_response.py | 1 - .../types/update_agents_response_status.py | 2 +- src/agora_agent/core/client_wrapper.py | 4 +- src/agora_agent/types/agent_error_response.py | 46 +++ .../types/agent_error_response_reason.py | 24 ++ src/agora_agent/types/open_ai_tts_params.py | 2 +- .../types/telephony_error_response.py | 34 +++ tests/custom/test_client.py | 268 +----------------- tests/custom/test_presets.py | 135 --------- 45 files changed, 407 insertions(+), 528 deletions(-) create mode 100644 .fern/replay.lock create mode 100644 .gitattributes delete mode 100644 .github/workflows/release.yml delete mode 100644 src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py delete mode 100644 src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py delete mode 100644 src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py rename src/agora_agent/{agent_management/types/agent_think_response.py => agents/types/get_turns_agents_response_pagination.py} (56%) create mode 100644 src/agora_agent/types/agent_error_response.py create mode 100644 src/agora_agent/types/agent_error_response_reason.py create mode 100644 src/agora_agent/types/telephony_error_response.py delete mode 100644 tests/custom/test_presets.py diff --git a/.fern/replay.lock b/.fern/replay.lock new file mode 100644 index 0000000..536b6e4 --- /dev/null +++ b/.fern/replay.lock @@ -0,0 +1,10 @@ +# DO NOT EDIT MANUALLY - Managed by Fern Replay +version: "1.0" +generations: + - commit_sha: a217c8ecfd919345831eebaca8295e292d65ebcf + tree_hash: 707f496ae7e028b80fc9a2adc1e5d69468f170b3 + timestamp: 2026-05-20T20:38:02.180Z + cli_version: unknown + generator_versions: {} +current_generation: a217c8ecfd919345831eebaca8295e292d65ebcf +patches: [] diff --git a/.fernignore b/.fernignore index d85561f..1d34ba0 100644 --- a/.fernignore +++ b/.fernignore @@ -14,3 +14,6 @@ docs/ pyproject.toml poetry.lock requirements.txt +.fern/replay.lock +.fern/replay.yml +.gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..74928d6 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.fern/replay.lock linguist-generated=true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d7dc922..f46ffcf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,7 @@ jobs: - name: Publish to pypi run: | poetry config repositories.remote https://upload.pypi.org/legacy/ - poetry --no-interaction -v publish --build --repository remote --username "__token__" --password "$PYPI_API_TOKEN" + poetry --no-interaction -v publish --build --repository remote --username "$PYPI_USERNAME" --password "$PYPI_PASSWORD" env: - PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 2c4b0e7..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: release - -on: - push: - tags: - - "v*" - workflow_dispatch: - inputs: - tag: - description: "Existing tag to release, for example v1.4.0" - required: true - type: string - -permissions: - contents: write - -jobs: - github-release: - runs-on: ubuntu-latest - env: - RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref_name }} - steps: - - name: Checkout repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Extract release notes - run: | - TAG="${RELEASE_TAG}" - git rev-parse "$TAG" >/dev/null - awk -v tag="$TAG" ' - index($0, "## [" tag "]") == 1 { found = 1; next } - found && /^## \[/ { exit } - found { print } - ' changelog.md > release_notes.md - - if [ ! -s release_notes.md ]; then - echo "Release $TAG" > release_notes.md - fi - - - name: Create or update GitHub release - env: - GH_TOKEN: ${{ github.token }} - run: | - TAG="${RELEASE_TAG}" - if gh release view "$TAG" >/dev/null 2>&1; then - gh release edit "$TAG" --title "$TAG" --notes-file release_notes.md - else - gh release create "$TAG" --title "$TAG" --notes-file release_notes.md - fi diff --git a/README.md b/README.md index f0ecfe5..f83f96b 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ and multimodal flows (MLLM) for real-time audio processing. - [Documentation](#documentation) - [Reference](#reference) - [Mllm Flow Multimodal](#mllm-flow-multimodal) +- [Mllm Flow Multimodal](#mllm-flow-multimodal) - [Usage](#usage) - [Async Client](#async-client) - [Exception Handling](#exception-handling) @@ -278,6 +279,71 @@ client.agents.start( ``` +## MLLM Flow (Multimodal) + +For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details. + +```python +from agora-agent-server-sdk import Agora +from agora-agent-server-sdk.agents import ( + StartAgentsRequestProperties, + StartAgentsRequestPropertiesAdvancedFeatures, + StartAgentsRequestPropertiesMllm, + StartAgentsRequestPropertiesMllmVendor, + StartAgentsRequestPropertiesTts, + StartAgentsRequestPropertiesTtsVendor, + StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionType, +) + +client = Agora( + customer_id="YOUR_CUSTOMER_ID", + customer_secret="YOUR_CUSTOMER_SECRET", +) + +client.agents.start( + appid="your_app_id", + name="mllm_agent", + properties=StartAgentsRequestProperties( + channel="channel_name", + token="your_token", + agent_rtc_uid="1001", + remote_rtc_uids=["1002"], + idle_timeout=120, + advanced_features=StartAgentsRequestPropertiesAdvancedFeatures( + enable_mllm=True, + ), + mllm=StartAgentsRequestPropertiesMllm( + url="wss://api.openai.com/v1/realtime", + api_key="", + vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI, + params={ + "model": "gpt-4o-realtime-preview", + "voice": "alloy", + }, + input_modalities=["audio"], + output_modalities=["text", "audio"], + greeting_message="Hello! I'm ready to chat in real-time.", + ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD, + threshold=0.5, + silence_duration_ms=500, + ), + # TTS and LLM are still required but not used when MLLM is enabled + tts=StartAgentsRequestPropertiesTts( + vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT, + params={}, + ), + llm=StartAgentsRequestPropertiesLlm( + url="https://api.openai.com/v1/chat/completions", + ), + ), +) +``` + + ## Usage Instantiate and use the client with the following: @@ -288,6 +354,9 @@ from agora_agent.agents import ( StartAgentsRequestProperties, StartAgentsRequestPropertiesAsr, StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionConfig, + StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech, ) client = Agora( @@ -325,6 +394,13 @@ client.agents.start( greeting_message="Hello, how can I assist you today?", failure_message="Please hold on a second.", ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + config=StartAgentsRequestPropertiesTurnDetectionConfig( + end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech( + mode="semantic", + ), + ), + ), ), ) ``` @@ -341,6 +417,9 @@ from agora_agent.agents import ( StartAgentsRequestProperties, StartAgentsRequestPropertiesAsr, StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionConfig, + StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech, ) client = AsyncAgora( @@ -381,6 +460,13 @@ async def main() -> None: greeting_message="Hello, how can I assist you today?", failure_message="Please hold on a second.", ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + config=StartAgentsRequestPropertiesTurnDetectionConfig( + end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech( + mode="semantic", + ), + ), + ), ), ) diff --git a/reference.md b/reference.md index 356b016..55a516e 100644 --- a/reference.md +++ b/reference.md @@ -32,6 +32,9 @@ from agora_agent.agents import ( StartAgentsRequestProperties, StartAgentsRequestPropertiesAsr, StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionConfig, + StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech, ) client = Agora( @@ -69,6 +72,13 @@ client.agents.start( greeting_message="Hello, how can I assist you today?", failure_message="Please hold on a second.", ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + config=StartAgentsRequestPropertiesTurnDetectionConfig( + end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech( + mode="semantic", + ), + ), + ), ), ) @@ -242,7 +252,6 @@ The agent state to filter by. Only one state can be specified per query: - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. -- `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. @@ -516,6 +525,22 @@ client.agents.get_turns(
+**page_index:** `typing.Optional[int]` — The page number. Starts from 1. + +
+
+ +
+
+ +**page_size:** `typing.Optional[int]` — The number of dialogue turns returned per page. + +
+
+ +
+
+ **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
@@ -540,7 +565,7 @@ client.agents.get_turns(
-Stop the specified conversational agent instance. +Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned.
@@ -1015,6 +1040,7 @@ client.agent_management.agent_think( The action to take when the agent is in a listening state: - `inject`: Inject the custom text instruction into the current turn without interrupting it. +- `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue. - `ignore`: Ignore the request. diff --git a/src/agora_agent/agent_management/client.py b/src/agora_agent/agent_management/client.py index 71b3c62..6140649 100644 --- a/src/agora_agent/agent_management/client.py +++ b/src/agora_agent/agent_management/client.py @@ -72,6 +72,7 @@ def agent_think( on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] The action to take when the agent is in a listening state: - `inject`: Inject the custom text instruction into the current turn without interrupting it. + - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue. - `ignore`: Ignore the request. on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] @@ -186,6 +187,7 @@ async def agent_think( on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] The action to take when the agent is in a listening state: - `inject`: Inject the custom text instruction into the current turn without interrupting it. + - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue. - `ignore`: Ignore the request. on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] diff --git a/src/agora_agent/agent_management/raw_client.py b/src/agora_agent/agent_management/raw_client.py index 03a0838..4af36fe 100644 --- a/src/agora_agent/agent_management/raw_client.py +++ b/src/agora_agent/agent_management/raw_client.py @@ -65,6 +65,7 @@ def agent_think( on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] The action to take when the agent is in a listening state: - `inject`: Inject the custom text instruction into the current turn without interrupting it. + - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue. - `ignore`: Ignore the request. on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] @@ -167,6 +168,7 @@ async def agent_think( on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] The action to take when the agent is in a listening state: - `inject`: Inject the custom text instruction into the current turn without interrupting it. + - `interrupt`: Immediately interrupt the current flow and initiate a new round of dialogue. - `ignore`: Ignore the request. on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py index 117d8cc..3812a6e 100644 --- a/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py +++ b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py @@ -2,4 +2,6 @@ import typing -AgentThinkAgentManagementRequestOnListeningAction = typing.Union[typing.Literal["inject", "ignore"], typing.Any] +AgentThinkAgentManagementRequestOnListeningAction = typing.Union[ + typing.Literal["inject", "interrupt", "ignore"], typing.Any +] diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_response.py b/src/agora_agent/agent_management/types/agent_think_agent_management_response.py index 7e512eb..0c1f689 100644 --- a/src/agora_agent/agent_management/types/agent_think_agent_management_response.py +++ b/src/agora_agent/agent_management/types/agent_think_agent_management_response.py @@ -20,7 +20,7 @@ class AgentThinkAgentManagementResponse(UncheckedBaseModel): start_ts: typing.Optional[int] = pydantic.Field(default=None) """ - Timestamp indicating when the agent was created. + Unix timestamp in seconds when the think request was processed. """ if IS_PYDANTIC_V2: diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py deleted file mode 100644 index 54cca4c..0000000 --- a/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -AgentThinkRequestOnListeningAction = typing.Union[typing.Literal["inject", "ignore"], typing.Any] diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py deleted file mode 100644 index 8329197..0000000 --- a/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -AgentThinkRequestOnSpeakingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any] diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py deleted file mode 100644 index ee50877..0000000 --- a/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -AgentThinkRequestOnThinkingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any] diff --git a/src/agora_agent/agents/client.py b/src/agora_agent/agents/client.py index 6c4a48c..3f6af4c 100644 --- a/src/agora_agent/agents/client.py +++ b/src/agora_agent/agents/client.py @@ -89,6 +89,9 @@ def start( StartAgentsRequestProperties, StartAgentsRequestPropertiesAsr, StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionConfig, + StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech, ) client = Agora( @@ -126,6 +129,13 @@ def start( greeting_message="Hello, how can I assist you today?", failure_message="Please hold on a second.", ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + config=StartAgentsRequestPropertiesTurnDetectionConfig( + end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech( + mode="semantic", + ), + ), + ), ), ) """ @@ -175,7 +185,6 @@ def list( - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. limit : typing.Optional[int] @@ -302,7 +311,13 @@ def get_history( return _response.data def get_turns( - self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None + self, + appid: str, + agent_id: str, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, ) -> GetTurnsAgentsResponse: """ Query conversation turn information for a conversational AI agent session. @@ -319,6 +334,12 @@ def get_turns( agent_id : str The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent. + page_index : typing.Optional[int] + The page number. Starts from 1. + + page_size : typing.Optional[int] + The number of dialogue turns returned per page. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -341,12 +362,14 @@ def get_turns( agent_id="agentId", ) """ - _response = self._raw_client.get_turns(appid, agent_id, request_options=request_options) + _response = self._raw_client.get_turns( + appid, agent_id, page_index=page_index, page_size=page_size, request_options=request_options + ) return _response.data def stop(self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None: """ - Stop the specified conversational agent instance. + Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned. Parameters ---------- @@ -623,6 +646,9 @@ async def start( StartAgentsRequestProperties, StartAgentsRequestPropertiesAsr, StartAgentsRequestPropertiesLlm, + StartAgentsRequestPropertiesTurnDetection, + StartAgentsRequestPropertiesTurnDetectionConfig, + StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech, ) client = AsyncAgora( @@ -663,6 +689,13 @@ async def main() -> None: greeting_message="Hello, how can I assist you today?", failure_message="Please hold on a second.", ), + turn_detection=StartAgentsRequestPropertiesTurnDetection( + config=StartAgentsRequestPropertiesTurnDetectionConfig( + end_of_speech=StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech( + mode="semantic", + ), + ), + ), ), ) @@ -715,7 +748,6 @@ async def list( - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. limit : typing.Optional[int] @@ -867,7 +899,13 @@ async def main() -> None: return _response.data async def get_turns( - self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None + self, + appid: str, + agent_id: str, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, ) -> GetTurnsAgentsResponse: """ Query conversation turn information for a conversational AI agent session. @@ -884,6 +922,12 @@ async def get_turns( agent_id : str The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent. + page_index : typing.Optional[int] + The page number. Starts from 1. + + page_size : typing.Optional[int] + The number of dialogue turns returned per page. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -914,12 +958,14 @@ async def main() -> None: asyncio.run(main()) """ - _response = await self._raw_client.get_turns(appid, agent_id, request_options=request_options) + _response = await self._raw_client.get_turns( + appid, agent_id, page_index=page_index, page_size=page_size, request_options=request_options + ) return _response.data async def stop(self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None: """ - Stop the specified conversational agent instance. + Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned. Parameters ---------- diff --git a/src/agora_agent/agents/raw_client.py b/src/agora_agent/agents/raw_client.py index c6eb03f..c0dfa01 100644 --- a/src/agora_agent/agents/raw_client.py +++ b/src/agora_agent/agents/raw_client.py @@ -144,7 +144,6 @@ def list( - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. limit : typing.Optional[int] @@ -293,7 +292,13 @@ def get_history( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) def get_turns( - self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None + self, + appid: str, + agent_id: str, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[GetTurnsAgentsResponse]: """ Query conversation turn information for a conversational AI agent session. @@ -310,6 +315,12 @@ def get_turns( agent_id : str The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent. + page_index : typing.Optional[int] + The page number. Starts from 1. + + page_size : typing.Optional[int] + The number of dialogue turns returned per page. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -321,6 +332,10 @@ def get_turns( _response = self._client_wrapper.httpx_client.request( f"v2/projects/{jsonable_encoder(appid)}/agents/{jsonable_encoder(agent_id)}/turns", method="GET", + params={ + "page_index": page_index, + "page_size": page_size, + }, request_options=request_options, ) try: @@ -342,7 +357,7 @@ def stop( self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> HttpResponse[None]: """ - Stop the specified conversational agent instance. + Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned. Parameters ---------- @@ -670,7 +685,6 @@ async def list( - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. limit : typing.Optional[int] @@ -822,7 +836,13 @@ async def get_history( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) async def get_turns( - self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None + self, + appid: str, + agent_id: str, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[GetTurnsAgentsResponse]: """ Query conversation turn information for a conversational AI agent session. @@ -839,6 +859,12 @@ async def get_turns( agent_id : str The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent. + page_index : typing.Optional[int] + The page number. Starts from 1. + + page_size : typing.Optional[int] + The number of dialogue turns returned per page. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -850,6 +876,10 @@ async def get_turns( _response = await self._client_wrapper.httpx_client.request( f"v2/projects/{jsonable_encoder(appid)}/agents/{jsonable_encoder(agent_id)}/turns", method="GET", + params={ + "page_index": page_index, + "page_size": page_size, + }, request_options=request_options, ) try: @@ -871,7 +901,7 @@ async def stop( self, appid: str, agent_id: str, *, request_options: typing.Optional[RequestOptions] = None ) -> AsyncHttpResponse[None]: """ - Stop the specified conversational agent instance. + Stop the specified conversational agent instance. The API responds after request parameters are validated, and the stop operation is processed asynchronously after the response is returned. Parameters ---------- diff --git a/src/agora_agent/agents/types/get_agents_response.py b/src/agora_agent/agents/types/get_agents_response.py index fe7e8e9..fd0aa2b 100644 --- a/src/agora_agent/agents/types/get_agents_response.py +++ b/src/agora_agent/agents/types/get_agents_response.py @@ -32,7 +32,6 @@ class GetAgentsResponse(UncheckedBaseModel): - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. """ diff --git a/src/agora_agent/agents/types/get_agents_response_status.py b/src/agora_agent/agents/types/get_agents_response_status.py index 2cef8c7..c170dc3 100644 --- a/src/agora_agent/agents/types/get_agents_response_status.py +++ b/src/agora_agent/agents/types/get_agents_response_status.py @@ -3,5 +3,5 @@ import typing GetAgentsResponseStatus = typing.Union[ - typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "RECOVERING", "FAILED"], typing.Any + typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "FAILED"], typing.Any ] diff --git a/src/agora_agent/agents/types/get_turns_agents_response.py b/src/agora_agent/agents/types/get_turns_agents_response.py index dca9ce1..653b3a5 100644 --- a/src/agora_agent/agents/types/get_turns_agents_response.py +++ b/src/agora_agent/agents/types/get_turns_agents_response.py @@ -5,10 +5,36 @@ import pydantic from ...core.pydantic_utilities import IS_PYDANTIC_V2 from ...core.unchecked_base_model import UncheckedBaseModel +from .get_turns_agents_response_pagination import GetTurnsAgentsResponsePagination from .get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem class GetTurnsAgentsResponse(UncheckedBaseModel): + agent_id: typing.Optional[str] = pydantic.Field(default=None) + """ + The unique identifier of the agent. + """ + + name: typing.Optional[str] = pydantic.Field(default=None) + """ + The name of the agent. + """ + + channel: typing.Optional[str] = pydantic.Field(default=None) + """ + The name of the RTC channel the agent joined. + """ + + total_turn_count: typing.Optional[int] = pydantic.Field(default=None) + """ + The total number of dialogue turns in the current session. + """ + + pagination: typing.Optional[GetTurnsAgentsResponsePagination] = pydantic.Field(default=None) + """ + Pagination information. + """ + turns: typing.Optional[typing.List[GetTurnsAgentsResponseTurnsItem]] = pydantic.Field(default=None) """ A list of conversation turns for the agent session. diff --git a/src/agora_agent/agent_management/types/agent_think_response.py b/src/agora_agent/agents/types/get_turns_agents_response_pagination.py similarity index 56% rename from src/agora_agent/agent_management/types/agent_think_response.py rename to src/agora_agent/agents/types/get_turns_agents_response_pagination.py index 3a3c646..8330f7c 100644 --- a/src/agora_agent/agent_management/types/agent_think_response.py +++ b/src/agora_agent/agents/types/get_turns_agents_response_pagination.py @@ -7,20 +7,24 @@ from ...core.unchecked_base_model import UncheckedBaseModel -class AgentThinkResponse(UncheckedBaseModel): - agent_id: typing.Optional[str] = pydantic.Field(default=None) +class GetTurnsAgentsResponsePagination(UncheckedBaseModel): """ - Unique identifier of the agent instance. + Pagination information. """ - channel: typing.Optional[str] = pydantic.Field(default=None) + page_index: typing.Optional[int] = pydantic.Field(default=None) """ - The name of the RTC channel where the agent is located. + The current page number; starts from 1. """ - start_ts: typing.Optional[int] = pydantic.Field(default=None) + total_pages: typing.Optional[int] = pydantic.Field(default=None) """ - Timestamp indicating when the agent was created. + The total number of pages. + """ + + is_last_page: typing.Optional[bool] = pydantic.Field(default=None) + """ + True if the current page is the last page. """ if IS_PYDANTIC_V2: diff --git a/src/agora_agent/agents/types/get_turns_agents_response_turns_item_end_metadata.py b/src/agora_agent/agents/types/get_turns_agents_response_turns_item_end_metadata.py index 0d9c61e..1e69709 100644 --- a/src/agora_agent/agents/types/get_turns_agents_response_turns_item_end_metadata.py +++ b/src/agora_agent/agents/types/get_turns_agents_response_turns_item_end_metadata.py @@ -28,8 +28,8 @@ class GetTurnsAgentsResponseTurnsItemEndMetadata(UncheckedBaseModel): - `api_leave`: The turn was interrupted because the agent left the channel. When `type` is `ignored`, possible values are: - - `semantic`: The turn was ignored because semantic end-of-speech detection determined no response was required. - - `keywords`: The turn was ignored because the start keyword was not detected. + - `semantic`: The turn was ignored because semantic end-of-speech detection determined no response was required. Applies when `turn_detection.config.end_of_speech.mode` is set to `semantic`. + - `keywords`: The turn was ignored because the start keyword was not detected. Applies when `turn_detection.config.start_of_speech.mode` is set to `keywords`. - `disable`: The turn was ignored because interruption is disabled for this turn. """ diff --git a/src/agora_agent/agents/types/get_turns_agents_response_turns_item_metrics_segmented_latency_ms_item.py b/src/agora_agent/agents/types/get_turns_agents_response_turns_item_metrics_segmented_latency_ms_item.py index 345d970..e849fdc 100644 --- a/src/agora_agent/agents/types/get_turns_agents_response_turns_item_metrics_segmented_latency_ms_item.py +++ b/src/agora_agent/agents/types/get_turns_agents_response_turns_item_metrics_segmented_latency_ms_item.py @@ -14,16 +14,16 @@ class GetTurnsAgentsResponseTurnsItemMetricsSegmentedLatencyMsItem(UncheckedBase When the LLM input modality is `text`, the returned segments are: - `algorithm_processing`: Algorithm processing delay. - - `asr_ttlw`: ASR Time To Last Word (TTLW) in milliseconds. - - `llm_ttft`: LLM Time To First Token (TTFT) in milliseconds. - - `llm_ftfs`: LLM First Token To First Sentence (FTFS) in milliseconds. - - `tts_ttfb`: TTS Time To First Byte (TTFB) in milliseconds. + - `asr_ttlw`: The ASR Time To Last Word (TTLW) in milliseconds. Represents the delay from when the user finishes speaking to when the ASR module outputs the last word. + - `llm_ttft`: The LLM Time To First Token (TTFT) in milliseconds. Represents the delay from when the user finishes speaking to when the LLM outputs the first token. + - `llm_ftfs`: The LLM First Token To First Sentence (FTFS) in milliseconds. Represents the delay from when the LLM outputs the first token to when it outputs the first complete sentence. + - `tts_ttfb`: The TTS Time To First Byte (TTFB) in milliseconds. Represents the delay from when the TTS module receives a text request to when it outputs the first audio byte. - `transport`: Network transmission delay in milliseconds. Not returned when the user is connected using the RTC Web SDK. When the LLM input modality is `audio`, the returned segments are: - `algorithm_processing`: Algorithm processing delay. - - `asr_ttlw`: ASR Time To Last Word (TTLW) in milliseconds. - - `llm_ttfa`: LLM Time To First Audio Byte (TTFA) in milliseconds. + - `asr_ttlw`: The ASR Time To Last Word (TTLW) in milliseconds. Represents the delay from when the user finishes speaking to when the ASR module outputs the last word. + - `llm_ttfa`: The LLM Time To First Audio Byte (TTFA) in milliseconds. Represents the delay from when the user finishes speaking to when the LLM outputs the first audio byte. - `transport`: Network transmission delay in milliseconds. Not returned when the user is connected using the RTC Web SDK. """ diff --git a/src/agora_agent/agents/types/interrupt_agents_response.py b/src/agora_agent/agents/types/interrupt_agents_response.py index 79ff7f5..f8c59af 100644 --- a/src/agora_agent/agents/types/interrupt_agents_response.py +++ b/src/agora_agent/agents/types/interrupt_agents_response.py @@ -20,7 +20,7 @@ class InterruptAgentsResponse(UncheckedBaseModel): start_ts: typing.Optional[int] = pydantic.Field(default=None) """ - Timestamp when the broadcast started + Unix timestamp in seconds when the interrupt request was processed. """ if IS_PYDANTIC_V2: diff --git a/src/agora_agent/agents/types/list_agents_response_data_list_item_status.py b/src/agora_agent/agents/types/list_agents_response_data_list_item_status.py index 58c42e0..a00c386 100644 --- a/src/agora_agent/agents/types/list_agents_response_data_list_item_status.py +++ b/src/agora_agent/agents/types/list_agents_response_data_list_item_status.py @@ -3,5 +3,5 @@ import typing ListAgentsResponseDataListItemStatus = typing.Union[ - typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "RECOVERING", "FAILED"], typing.Any + typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "FAILED"], typing.Any ] diff --git a/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py b/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py index 78250d7..e75f098 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py @@ -14,7 +14,7 @@ class StartAgentsRequestPropertiesAdvancedFeatures(UncheckedBaseModel): enable_mllm: typing.Optional[bool] = pydantic.Field(default=None) """ - Use `mllm.enable` instead. Enable Multimodal Large Language Model for voice-to-voice processing. Enabling MLLM automatically disables ASR, LLM, and TTS since the MLLM handles end-to-end voice processing directly. See `turn_detection.mode` for turn detection options available with MLLM. + Use `mllm.enable` instead. Enable Multimodal Large Language Model for voice-to-voice processing. Enabling MLLM automatically disables ASR, LLM, and TTS since the MLLM handles end-to-end voice processing directly. See `turn_detection.type` for turn detection options available with MLLM. """ enable_rtm: typing.Optional[bool] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py index 24e3888..8993b2c 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py @@ -24,6 +24,7 @@ class StartAgentsRequestPropertiesAvatar(UncheckedBaseModel): - `akool`: Akool (Beta) - `liveavatar`: LiveAvatar (Beta) - `anam`: Anam (Beta) + - `generic`: Generic (Beta) """ params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py index 2926d04..e5bcec5 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py @@ -3,5 +3,5 @@ import typing StartAgentsRequestPropertiesAvatarVendor = typing.Union[ - typing.Literal["akool", "liveavatar", "anam", "heygen"], typing.Any + typing.Literal["akool", "liveavatar", "anam", "generic", "heygen"], typing.Any ] diff --git a/src/agora_agent/agents/types/start_agents_request_properties_filler_words_content_static_config.py b/src/agora_agent/agents/types/start_agents_request_properties_filler_words_content_static_config.py index e220739..7fe264c 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_filler_words_content_static_config.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_filler_words_content_static_config.py @@ -15,7 +15,7 @@ class StartAgentsRequestPropertiesFillerWordsContentStaticConfig(UncheckedBaseMo Static filler word configuration. Used when `mode` is `static`. """ - phrases: typing.Optional[typing.List[str]] = pydantic.Field(default=None) + phrases: typing.List[str] = pydantic.Field() """ List of filler word phrases. Maximum 100 filler words, each not exceeding 50 English words. """ diff --git a/src/agora_agent/agents/types/start_agents_request_properties_llm.py b/src/agora_agent/agents/types/start_agents_request_properties_llm.py index 20c391e..9ab0f62 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_llm.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_llm.py @@ -44,7 +44,7 @@ class StartAgentsRequestPropertiesLlm(UncheckedBaseModel): """ LLM input modalities: - `["text"]`: Text only - - `["text", "image"]`: Text plus image; requires the selected LLM to support visual input + - `["text", "image"]`: Text plus image. Recommended configuration, requires the selected LLM to support visual input """ output_modalities: typing.Optional[typing.List[str]] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py b/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py index a8594ee..c0d7046 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py @@ -27,6 +27,12 @@ class StartAgentsRequestPropertiesLlmGreetingConfigs(UncheckedBaseModel): The delay in milliseconds before the agent plays the greeting message after a user joins the channel. """ + interruptable: typing.Optional[bool] = pydantic.Field(default=None) + """ + - `true`: Follows the global `interruption` configuration. + - `false`: Uninterruptible. The greeting plays in its entirety. If the user speaks multiple times while the greeting plays, the system merges the speech segments after the greeting ends and sends them to the LLM for a single response. + """ + if IS_PYDANTIC_V2: model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 else: diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py index e84422c..0993ebc 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py @@ -59,17 +59,7 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel): failure_message: typing.Optional[str] = pydantic.Field(default=None) """ - Message played when the MLLM call fails. - """ - - max_history: typing.Optional[int] = pydantic.Field(default=None) - """ - Maximum number of conversation history messages cached for the MLLM session. - """ - - predefined_tools: typing.Optional[typing.List[str]] = pydantic.Field(default=None) - """ - Predefined tools available to the MLLM provider. + Agent failure message. If provided, the agent speaks this message when an MLLM request fails. """ vendor: typing.Optional[StartAgentsRequestPropertiesMllmVendor] = pydantic.Field(default=None) @@ -78,6 +68,7 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel): - `openai`: OpenAI Realtime API - `gemini`: Google Gemini Live - `vertexai`: Google Gemini Live (Vertex AI) + - `xai`: xAI Grok Realtime API """ turn_detection: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetection] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py index 9298a0c..032979d 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py @@ -26,7 +26,7 @@ class StartAgentsRequestPropertiesMllmTurnDetection(UncheckedBaseModel): """ Turn detection mode for MLLM: - `agora_vad`: Agora VAD-based detection. - - `server_vad`: Vendor-side VAD-based detection. Supported by OpenAI Realtime API and Gemini Live. + - `server_vad`: Vendor-side VAD-based detection. Supported by OpenAI Realtime API, Gemini Live, and xAI Grok. - `semantic_vad`: Semantic-based detection. Supported by OpenAI Realtime API only. """ diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py index d27b76e..c74d8d7 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py @@ -30,7 +30,7 @@ class StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfig(UncheckedBase threshold: typing.Optional[float] = pydantic.Field(default=None) """ - VAD sensitivity threshold. Applicable to OpenAI Realtime API only. + VAD sensitivity threshold. Applicable to OpenAI Realtime API and xAI Grok. """ idle_timeout_ms: typing.Optional[int] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_vendor.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_vendor.py index 519b143..0233696 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_mllm_vendor.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_vendor.py @@ -2,4 +2,4 @@ import typing -StartAgentsRequestPropertiesMllmVendor = typing.Union[typing.Literal["openai", "gemini", "vertexai"], typing.Any] +StartAgentsRequestPropertiesMllmVendor = typing.Union[typing.Literal["openai", "gemini", "vertexai", "xai"], typing.Any] diff --git a/src/agora_agent/agents/types/start_agents_response.py b/src/agora_agent/agents/types/start_agents_response.py index ee3f32e..1d69a34 100644 --- a/src/agora_agent/agents/types/start_agents_response.py +++ b/src/agora_agent/agents/types/start_agents_response.py @@ -27,7 +27,6 @@ class StartAgentsResponse(UncheckedBaseModel): - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. """ diff --git a/src/agora_agent/agents/types/start_agents_response_status.py b/src/agora_agent/agents/types/start_agents_response_status.py index 6926aca..894df21 100644 --- a/src/agora_agent/agents/types/start_agents_response_status.py +++ b/src/agora_agent/agents/types/start_agents_response_status.py @@ -3,5 +3,5 @@ import typing StartAgentsResponseStatus = typing.Union[ - typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "RECOVERING", "FAILED"], typing.Any + typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "FAILED"], typing.Any ] diff --git a/src/agora_agent/agents/types/update_agents_response.py b/src/agora_agent/agents/types/update_agents_response.py index 9f73945..8951b77 100644 --- a/src/agora_agent/agents/types/update_agents_response.py +++ b/src/agora_agent/agents/types/update_agents_response.py @@ -27,7 +27,6 @@ class UpdateAgentsResponse(UncheckedBaseModel): - `RUNNING` (2): The agent is running. - `STOPPING` (3): The agent is stopping. - `STOPPED` (4): The agent has exited. - - `RECOVERING` (5): The agent is recovering. - `FAILED` (6): The agent failed to execute. """ diff --git a/src/agora_agent/agents/types/update_agents_response_status.py b/src/agora_agent/agents/types/update_agents_response_status.py index 7f4078f..c8e9133 100644 --- a/src/agora_agent/agents/types/update_agents_response_status.py +++ b/src/agora_agent/agents/types/update_agents_response_status.py @@ -3,5 +3,5 @@ import typing UpdateAgentsResponseStatus = typing.Union[ - typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "RECOVERING", "FAILED"], typing.Any + typing.Literal["IDLE", "STARTING", "RUNNING", "STOPPING", "STOPPED", "FAILED"], typing.Any ] diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py index c5a0e03..ed2ed34 100644 --- a/src/agora_agent/core/client_wrapper.py +++ b/src/agora_agent/core/client_wrapper.py @@ -26,10 +26,10 @@ def __init__( def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { - "User-Agent": "agora-agent-server-sdk/v1.4.0", + "User-Agent": "agora-agent-server-sdk/v1.5.0", "X-Fern-Language": "Python", "X-Fern-SDK-Name": "agora-agent-server-sdk", - "X-Fern-SDK-Version": "v1.4.0", + "X-Fern-SDK-Version": "v1.5.0", **(self.get_custom_headers() or {}), } headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header diff --git a/src/agora_agent/types/agent_error_response.py b/src/agora_agent/types/agent_error_response.py new file mode 100644 index 0000000..510ce74 --- /dev/null +++ b/src/agora_agent/types/agent_error_response.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .agent_error_response_reason import AgentErrorResponseReason + + +class AgentErrorResponse(UncheckedBaseModel): + """ + Error response returned when a Conversational AI Agent Management API request fails. + + Common HTTP status codes include: + - `400`: Invalid request parameters. + - `401`: Authentication failed. + - `403`: Unauthorized access or service not enabled. + - `404`: Agent not found or has exited. + - `409`: Agent conflict. + - `422`: Access limit exceeded. + - `429`: Request rate limit exceeded. + - `500`: Internal server error. + - `502`: Gateway error. + - `503`: Agent startup failure. + - `504`: Request timeout. + """ + + detail: typing.Optional[str] = pydantic.Field(default=None) + """ + Error detail message. + """ + + reason: typing.Optional[AgentErrorResponseReason] = pydantic.Field(default=None) + """ + Error reason code. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/agent_error_response_reason.py b/src/agora_agent/types/agent_error_response_reason.py new file mode 100644 index 0000000..c7144cf --- /dev/null +++ b/src/agora_agent/types/agent_error_response_reason.py @@ -0,0 +1,24 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +AgentErrorResponseReason = typing.Union[ + typing.Literal[ + "ServiceNotEnabled", + "AccountSuspended", + "InternalError", + "InvalidPermission", + "InvalidRequestBody", + "MissingRequiredField", + "InvalidFieldValue", + "ResourceQuotaLimitExceeded", + "ConcurrencyLimitExceeded", + "ServiceUnavailable", + "ResourceAllocationFailed", + "TaskConflict", + "TaskNotFound", + "TaskOperationTimeout", + "NotImplemented", + ], + typing.Any, +] diff --git a/src/agora_agent/types/open_ai_tts_params.py b/src/agora_agent/types/open_ai_tts_params.py index 67a6e1d..f042c1f 100644 --- a/src/agora_agent/types/open_ai_tts_params.py +++ b/src/agora_agent/types/open_ai_tts_params.py @@ -14,7 +14,7 @@ class OpenAiTtsParams(UncheckedBaseModel): api_key: typing.Optional[str] = pydantic.Field(default=None) """ - OpenAI API key + OpenAI API key. Optional for preset-backed OpenAI TTS usage. """ voice: str = pydantic.Field() diff --git a/src/agora_agent/types/telephony_error_response.py b/src/agora_agent/types/telephony_error_response.py new file mode 100644 index 0000000..af4e7d4 --- /dev/null +++ b/src/agora_agent/types/telephony_error_response.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class TelephonyErrorResponse(UncheckedBaseModel): + """ + Error response returned when a Telephony or Phone Number Management API request fails. + + The response body includes `error_type` and `description` fields (not the Agent Management `detail`/`reason` shape). + """ + + error_type: typing.Optional[str] = pydantic.Field(default=None) + """ + The type of error that occurred. + """ + + description: typing.Optional[str] = pydantic.Field(default=None) + """ + A detailed description of the error. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 77fae36..ab04ce6 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -1,265 +1,7 @@ -from agora_agent.agentkit.agent import Agent, AdvancedFeatures, InterruptionConfig, MllmTurnDetectionConfig, TurnDetectionConfig -from agora_agent.agentkit.constants import TurnDetectionTypeValues -import asyncio -import warnings -from agora_agent.agentkit.agent_session import AgentSession, AsyncAgentSession -from agora_agent.agentkit.vendors import DeepgramTTS, HeyGenAvatar, MicrosoftTTS, OpenAI, OpenAIRealtime -from agora_agent.agentkit import AgentThinkResponse -from typing import Any, Dict, List, Tuple +import pytest -class _AgentManagementStub: - def __init__(self) -> None: - self.calls: List[Tuple[str, str, Dict[str, Any]]] = [] - - def agent_think(self, appid, agent_id, **kwargs): # noqa: ANN001 - self.calls.append((appid, agent_id, kwargs)) - return AgentThinkResponse(agent_id=agent_id, channel="room", start_ts=1) - - -class _ClientStub: - auth_mode = "basic" - - def __init__(self) -> None: - self.agents = object() - self.agent_management = _AgentManagementStub() - - -class _AsyncAgentManagementStub: - def __init__(self) -> None: - self.calls: List[Tuple[str, str, Dict[str, Any]]] = [] - - async def agent_think(self, appid, agent_id, **kwargs): # noqa: ANN001 - self.calls.append((appid, agent_id, kwargs)) - return AgentThinkResponse(agent_id=agent_id, channel="room", start_ts=1) - - -class _AsyncClientStub: - auth_mode = "basic" - - def __init__(self) -> None: - self.agents = object() - self.agent_management = _AsyncAgentManagementStub() - - -def test_agentkit_think_routes_to_agent_management() -> None: - client = _ClientStub() - session = AgentSession( - client=client, - agent=Agent(), - app_id="appid", - name="agent", - channel="room", - token="token", - agent_uid="1", - remote_uids=["2"], - ) - session._status = "running" - session._agent_id = "agent-1" - - response = session.think("Injected instruction", on_thinking_action="interrupt") - assert response.agent_id == "agent-1" - assert len(client.agent_management.calls) == 1 - appid, agent_id, kwargs = client.agent_management.calls[0] - assert appid == "appid" - assert agent_id == "agent-1" - assert kwargs["text"] == "Injected instruction" - assert kwargs["on_thinking_action"] == "interrupt" - - -def test_async_agentkit_think_routes_to_agent_management() -> None: - async def _run() -> None: - client = _AsyncClientStub() - session = AsyncAgentSession( - client=client, - agent=Agent(), - app_id="appid", - name="agent", - channel="room", - token="token", - agent_uid="1", - remote_uids=["2"], - ) - session._status = "running" - session._agent_id = "agent-1" - - response = await session.think("Injected instruction", on_thinking_action="interrupt") - assert response.agent_id == "agent-1" - assert len(client.agent_management.calls) == 1 - appid, agent_id, kwargs = client.agent_management.calls[0] - assert appid == "appid" - assert agent_id == "agent-1" - assert kwargs["text"] == "Injected instruction" - assert kwargs["on_thinking_action"] == "interrupt" - - asyncio.run(_run()) - - -def test_llm_vendor_headers_are_forwarded_to_properties() -> None: - agent = Agent().with_llm( - OpenAI( - api_key="openai-key", - model="gpt-4o-mini", - headers={"X-Trace-Id": "trace-123"}, - output_modalities=["text", "audio"], - greeting_configs={"mode": "single_first"}, - template_variables={"caller_name": "Ada"}, - ) - ).with_tts(MicrosoftTTS(key="tts-key", region="eastus", voice_name="en-US-JennyNeural")) - - props = agent.to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - ) - - assert props.llm is not None - assert props.llm.headers == {"X-Trace-Id": "trace-123"} - assert props.llm.output_modalities == ["text", "audio"] - assert props.llm.greeting_configs is not None - assert props.llm.greeting_configs.mode == "single_first" - assert props.llm.template_variables == {"caller_name": "Ada"} - - -def test_with_turn_detection_forwards_config() -> None: - turn_detection = TurnDetectionConfig( - type=TurnDetectionTypeValues.AGORA_VAD, - threshold=0.5, - ) - - props = Agent().with_turn_detection(turn_detection).to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - skip_vendor_validation=True, - ) - - assert props.turn_detection == turn_detection - - -def test_with_interruption_forwards_config() -> None: - interruption = InterruptionConfig( - enable=False, - disabled_config={"strategy": "ignore"}, - ) - - props = Agent().with_interruption(interruption).to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - skip_vendor_validation=True, - ) - - assert props.interruption == interruption - - -def test_mllm_turn_detection_is_forwarded_without_legacy_style() -> None: - mllm_turn_detection = MllmTurnDetectionConfig( - mode="server_vad", - server_vad_config={"idle_timeout_ms": 5000}, - ) - props = Agent().with_mllm( - OpenAIRealtime(api_key="openai-key", turn_detection=mllm_turn_detection) - ).to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - ) - - assert props.mllm is not None - assert props.mllm.vendor == "openai" - assert "style" not in props.mllm.dict() - assert props.mllm.turn_detection == mllm_turn_detection - - -def test_with_mllm_sets_mllm_enable_without_legacy_flag() -> None: - agent = Agent().with_mllm(OpenAIRealtime(api_key="openai-key")) - - props = agent.to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - ) - - assert props.mllm is not None - assert props.mllm.enable is True - assert props.advanced_features is None - - -def test_with_mllm_removes_deprecated_enable_mllm_from_existing_advanced_features() -> None: - agent = Agent( - advanced_features=AdvancedFeatures(enable_mllm=True, enable_rtm=True) - ).with_mllm(OpenAIRealtime(api_key="openai-key")) - - props = agent.to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - ) - - assert props.mllm is not None - assert props.mllm.enable is True - assert props.advanced_features is not None - assert props.advanced_features.enable_mllm is None - assert props.advanced_features.enable_rtm is True - - -def test_with_mllm_drops_advanced_features_when_only_deprecated_enable_mllm_was_set() -> None: - props = Agent( - advanced_features=AdvancedFeatures(enable_mllm=True) - ).with_mllm(OpenAIRealtime(api_key="openai-key")).to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - ) - - assert props.mllm is not None - assert props.mllm.enable is True - assert props.advanced_features is None - - -def test_with_tools_sets_enable_tools() -> None: - props = Agent().with_tools().to_properties( - channel="room", - token="rtc-token", - agent_uid="1", - remote_uids=["2"], - skip_vendor_validation=True, - ) - - assert props.advanced_features is not None - assert props.advanced_features.enable_tools is True - - -def test_heygen_avatar_emits_deprecation_warning() -> None: - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") - HeyGenAvatar(api_key="heygen-key", quality="high", agora_uid="42") - - assert any("HeyGenAvatar is deprecated" in str(warning.message) for warning in caught) - - -def test_deepgram_tts_vendor_config() -> None: - tts = DeepgramTTS( - api_key="deepgram-key", - model="aura-2-thalia-en", - base_url="wss://api.deepgram.com/v1/speak", - sample_rate=24000, - params={"encoding": "linear16"}, - ).to_config() - - assert tts["vendor"] == "deepgram" - assert tts["params"] == { - "api_key": "deepgram-key", - "model": "aura-2-thalia-en", - "base_url": "wss://api.deepgram.com/v1/speak", - "sample_rate": 24000, - "encoding": "linear16", - } +# Get started with writing tests with pytest at https://docs.pytest.org +@pytest.mark.skip(reason="Unimplemented") +def test_client() -> None: + assert True diff --git a/tests/custom/test_presets.py b/tests/custom/test_presets.py deleted file mode 100644 index c05c477..0000000 --- a/tests/custom/test_presets.py +++ /dev/null @@ -1,135 +0,0 @@ -from agora_agent.agentkit.presets import resolve_session_presets - - -def test_minimax_preset_strips_group_id_and_url_when_no_key() -> None: - """When no key is provided, preset is inferred and credential fields are stripped.""" - properties = { - "tts": { - "vendor": "minimax", - "params": { - "group_id": "my-group", - "model": "speech-2.6-turbo", - "url": "wss://api-uw.minimax.io/ws/v1/t2a_v2", - "voice_setting": {"voice_id": "English_captivating_female1"}, - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset == "minimax_speech_2_6_turbo" - params = resolved["tts"]["params"] - assert "group_id" not in params - assert "url" not in params - assert "model" not in params - assert params["voice_setting"]["voice_id"] == "English_captivating_female1" - - -def test_minimax_preset_strips_group_id_and_url_for_28_turbo() -> None: - properties = { - "tts": { - "vendor": "minimax", - "params": { - "group_id": "org-123", - "model": "speech-2.8-turbo", - "url": "wss://api.minimax.io/ws/v1/t2a_v2", - "voice_setting": {"voice_id": "some-voice"}, - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset == "minimax_speech_2_8_turbo" - params = resolved["tts"]["params"] - assert "group_id" not in params - assert "url" not in params - assert "model" not in params - - -def test_minimax_preset_strips_group_id_and_url_with_underscore_model_name() -> None: - properties = { - "tts": { - "vendor": "minimax", - "params": { - "group_id": "my-group", - "model": "speech_2_6_turbo", - "url": "wss://api-uw.minimax.io/ws/v1/t2a_v2", - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset == "minimax_speech_2_6_turbo" - params = resolved["tts"].get("params") or {} - assert "group_id" not in params - assert "url" not in params - assert "model" not in params - - -def test_minimax_preset_not_inferred_when_key_present() -> None: - """When user provides their own key, preset is NOT inferred and nothing is stripped.""" - properties = { - "tts": { - "vendor": "minimax", - "params": { - "key": "user-secret", - "group_id": "my-group", - "model": "speech-2.6-turbo", - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset is None - params = resolved["tts"]["params"] - assert params.get("key") == "user-secret" - assert params.get("group_id") == "my-group" - - -def test_minimax_preset_not_inferred_when_explicit_preset_given() -> None: - """When an explicit tts preset is provided, tts inference is skipped.""" - properties = { - "tts": { - "vendor": "minimax", - "params": { - "group_id": "my-group", - "model": "speech-2.6-turbo", - }, - } - } - preset, resolved = resolve_session_presets("minimax_speech_2_6_turbo", properties) - assert preset == "minimax_speech_2_6_turbo" - # Explicit preset: tts inference is skipped, params are NOT stripped - params = resolved["tts"]["params"] - assert params.get("group_id") == "my-group" - - -def test_deepgram_preset_strips_model_and_api_key() -> None: - properties = { - "asr": { - "vendor": "deepgram", - "params": { - "model": "nova-3", - "language": "en-US", - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset == "deepgram_nova_3" - params = resolved["asr"]["params"] - assert "model" not in params - assert "api_key" not in params - assert params.get("language") == "en-US" - - -def test_openai_llm_preset_strips_model_api_key_and_default_url() -> None: - properties = { - "llm": { - "vendor": "openai", - "url": "https://api.openai.com/v1/chat/completions", - "params": { - "model": "gpt-4o-mini", - }, - } - } - preset, resolved = resolve_session_presets(None, properties) - assert preset == "openai_gpt_4o_mini" - llm = resolved["llm"] - assert "api_key" not in llm - assert "url" not in llm - assert "model" not in (llm.get("params") or {})