Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/packages/gemini/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ pip install agent-framework-gemini --pre

The Gemini integration enables Microsoft Agent Framework applications to call Google Gemini models with familiar chat abstractions, including streaming, tool/function calling, and structured output.

## Structured Output

Gemini structured output can be configured with either a Pydantic model in `response_format`, a JSON schema mapping in `response_format`, or a Gemini-specific `response_schema`. Declarative agents that define `outputSchema` pass that schema through `response_format`.

## Authentication

The connector supports both `google-genai` authentication modes.
Expand Down
97 changes: 92 additions & 5 deletions python/packages/gemini/agent_framework_gemini/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import sys
from collections.abc import AsyncIterable, Awaitable, Mapping, Sequence
from typing import Any, ClassVar, Generic, cast
from typing import Any, ClassVar, Generic, TypeGuard, cast
from uuid import uuid4

from agent_framework import (
Expand Down Expand Up @@ -109,8 +109,8 @@ class GeminiChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
or ``types.Tool`` objects returned by ``get_code_interpreter_tool``, ``get_web_search_tool``,
``get_mcp_tool``, ``get_file_search_tool``, or ``get_maps_grounding_tool``.
tool_choice: How the model picks a tool. One of ``'auto'``, ``'none'``, or ``'required'``.
response_format: Pydantic model type for structured JSON output. The response text is
parsed into the model and exposed via ``ChatResponse.value``.
response_format: Pydantic model type or JSON schema mapping for structured JSON output.
The response text is parsed and exposed via ``ChatResponse.value``.
instructions: Extra system-level instructions prepended to the system message.

Not supported, and passing these raises a type error:
Expand Down Expand Up @@ -255,6 +255,40 @@ def _validate_client_auth_configuration(

_OPTION_EXCLUDE_KEYS: frozenset[str] = _OPTION_EXPLICIT_KEYS | _OPTION_CONSUMED_KEYS

_JSON_SCHEMA_TYPES: frozenset[str] = frozenset({
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string",
})

_JSON_SCHEMA_KEYWORDS: frozenset[str] = frozenset({
"$defs",
"additionalProperties",
"allOf",
"anyOf",
"enum",
"items",
"oneOf",
"properties",
"required",
"type",
})


def _is_str_mapping(value: Any) -> TypeGuard[Mapping[str, Any]]:
"""Narrow ``Any`` to ``Mapping[str, Any]`` (pyright doesn't infer key type from ``isinstance``)."""
return isinstance(value, Mapping)


def _is_non_string_sequence(value: Any) -> TypeGuard[Sequence[Any]]:
"""Narrow ``Any`` to ``Sequence[Any]``, excluding ``str``/``bytes``."""
return isinstance(value, Sequence) and not isinstance(value, (str, bytes))


_FINISH_REASON_MAP: dict[str, FinishReasonLiteral] = {
"STOP": "stop",
"MAX_TOKENS": "length",
Expand Down Expand Up @@ -747,9 +781,13 @@ def _prepare_config(
continue
kwargs[_OPTION_TRANSLATIONS.get(key, key)] = value

if options.get("response_format") or options.get("response_schema"):
response_format = options.get("response_format")
response_schema = options.get("response_schema")
if response_format is not None or response_schema is not None:
kwargs["response_mime_type"] = "application/json"
if schema := options.get("response_schema"):
if response_schema is not None:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This establishes response_schema precedence for the outbound Gemini config, but the inbound parsing path still ignores that precedence: _inner_get_response() passes validated.get("response_format") into _process_generate_response() at line 528, and the streaming path passes only options.get("response_format") into _build_response_stream() at line 522. If a caller sets both fields, Gemini will be constrained by response_schema while ChatResponse.value is parsed according to response_format, silently diverging from the precedence asserted here.

kwargs["response_schema"] = response_schema
elif (schema := self._extract_response_schema(response_format)) is not None:
kwargs["response_schema"] = schema
if tools := self._prepare_tools(options):
kwargs["tools"] = tools
Expand All @@ -762,6 +800,55 @@ def _prepare_config(

return types.GenerateContentConfig(**kwargs)

@staticmethod
def _extract_response_schema(response_format: Any) -> dict[str, Any] | type[BaseModel] | None:
"""Extract a Gemini response schema from a Pydantic class or supported mapping envelope.

Pydantic classes are forwarded unchanged (google-genai's ``response_schema`` accepts ``type``).
Mapping envelopes (``format``, ``json_schema``, ``schema``, or a bare JSON Schema) are
returned as a ``dict``. Returns ``None`` for anything else (e.g. plain JSON-object mode).
"""
if isinstance(response_format, type) and issubclass(response_format, BaseModel):
return response_format

if not _is_str_mapping(response_format):
return None

if (
_is_str_mapping(format_config := response_format.get("format"))
and (schema := RawGeminiChatClient._extract_response_schema(format_config)) is not None
):
return schema

if _is_str_mapping(json_schema := response_format.get("json_schema")) and _is_str_mapping(
inner_schema := json_schema.get("schema")
):
return dict(inner_schema)

if _is_str_mapping(schema := response_format.get("schema")):
return dict(schema)

if RawGeminiChatClient._is_json_schema_mapping(response_format):
return dict(response_format)

return None

@staticmethod
def _is_json_schema_mapping(value: Mapping[str, Any]) -> bool:
"""Return True when a mapping appears to be a JSON Schema rather than a response-format envelope."""
if not any(keyword in value for keyword in _JSON_SCHEMA_KEYWORDS):
return False

schema_type = value.get("type")
if schema_type is None:
return True
if isinstance(schema_type, str):
return schema_type in _JSON_SCHEMA_TYPES
if _is_non_string_sequence(schema_type):
return all(isinstance(item, str) and item in _JSON_SCHEMA_TYPES for item in schema_type)

return False

def _prepare_tools(self, options: Mapping[str, Any]) -> list[types.Tool] | None:
"""Translate the framework tool list into Gemini API tool objects.

Expand Down
Loading
Loading