From 3b79e58bf6c1779beec7b6dca412a5ba4829c7e1 Mon Sep 17 00:00:00 2001 From: SinhSinh An Date: Sat, 25 Apr 2026 11:12:34 -0500 Subject: [PATCH 1/2] feat(lib): add get_model_capabilities for runtime parameter routing Applications that support multiple OpenAI models (gpt-4.1, gpt-5.x, o-series, etc.) currently maintain prefix checks like `model.startswith("gpt-5")` to decide which parameters to send. These checks break every time a new family launches, and the version-based logic for reasoning_effort levels is reverse-engineered from documentation rather than provided by the SDK. Add a hand-curated capability registry under `openai.lib._models` exposing two public symbols: - `ModelCapabilities`: a frozen dataclass with `family`, `supports_temperature`, `supports_reasoning`, and `reasoning_effort_options` fields. - `get_model_capabilities(model)`: looks up a model identifier and returns the matching `ModelCapabilities`, or `None` if unknown. The lookup is date-suffix-aware (gpt-5.4-mini-2026-03-17 -> gpt-5.4), recognises size variants (-mini, -nano, -pro), and treats *-chat-latest and *-search-preview models as non-reasoning chat variants. Both symbols are re-exported from `openai` and `openai.lib`. Includes 35 unit tests covering each family, edge cases (unknown models, non-string inputs, frozen instances), and the realistic "route temperature/effort decisions by model" use case from the issue. helpers.md documents the new public surface. The registry lives in `src/openai/lib/`, which CONTRIBUTING.md designates as the area Stainless will not regenerate. Closes #3073 --- helpers.md | 24 +++ src/openai/__init__.py | 7 +- src/openai/lib/__init__.py | 1 + src/openai/lib/_models.py | 211 ++++++++++++++++++++++ tests/lib/test_model_capabilities.py | 259 +++++++++++++++++++++++++++ 5 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 src/openai/lib/_models.py create mode 100644 tests/lib/test_model_capabilities.py diff --git a/helpers.md b/helpers.md index 89ff4498cf..de0631f97e 100644 --- a/helpers.md +++ b/helpers.md @@ -516,3 +516,27 @@ client.beta.vector_stores.file_batches.create_and_poll(...) client.beta.vector_stores.file_batches.upload_and_poll(...) client.videos.create_and_poll(...) ``` + +# Model Capability Helpers + +Different model families accept different request parameters. `temperature` is rejected by gpt-5.x reasoning models, `reasoning.effort` only applies to reasoning models, and the valid set of `effort` values varies between gpt-5, gpt-5.1+, and gpt-5.4+. + +The SDK ships a hand-curated capability registry so applications that support multiple models can decide which parameters to send (and which UI controls to render) without prefix-matching model strings themselves: + +```python +from openai import get_model_capabilities + +caps = get_model_capabilities("gpt-5.4-mini") +caps.family # "gpt-5.4" +caps.supports_temperature # False +caps.supports_reasoning # True +caps.reasoning_effort_options # ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') + +get_model_capabilities("gpt-4.1").supports_reasoning # False +get_model_capabilities("gpt-5-chat-latest").supports_temperature # True (chat variant) +get_model_capabilities("nonexistent-model") # None +``` + +Capabilities are matched by longest-prefix and are date-suffix-aware (so `gpt-5.4-mini-2026-03-17` resolves to the `gpt-5.4` family). `*-chat-latest` and `*-search-preview` variants are recognized as non-reasoning chat models. + +`get_model_capabilities` returns `None` for unknown models. Treat that as "fall back to the API's own validation": send your default parameters and handle a 400 response if the model rejects them. The registry is updated as new model families ship; pin a specific SDK version if you need a stable view. diff --git a/src/openai/__init__.py b/src/openai/__init__.py index fc9675a8b5..ad0ed6515f 100644 --- a/src/openai/__init__.py +++ b/src/openai/__init__.py @@ -96,7 +96,12 @@ if not _t.TYPE_CHECKING: from ._utils._resources_proxy import resources as resources -from .lib import azure as _azure, pydantic_function_tool as pydantic_function_tool +from .lib import ( + ModelCapabilities as ModelCapabilities, + azure as _azure, + get_model_capabilities as get_model_capabilities, + pydantic_function_tool as pydantic_function_tool, +) from .version import VERSION as VERSION from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI from .lib._old_api import * diff --git a/src/openai/lib/__init__.py b/src/openai/lib/__init__.py index 5c6cb782c0..d948b6ab0c 100644 --- a/src/openai/lib/__init__.py +++ b/src/openai/lib/__init__.py @@ -1,2 +1,3 @@ from ._tools import pydantic_function_tool as pydantic_function_tool +from ._models import ModelCapabilities as ModelCapabilities, get_model_capabilities as get_model_capabilities from ._parsing import ResponseFormatT as ResponseFormatT diff --git a/src/openai/lib/_models.py b/src/openai/lib/_models.py new file mode 100644 index 0000000000..5aea2ec2ec --- /dev/null +++ b/src/openai/lib/_models.py @@ -0,0 +1,211 @@ +"""Convenience helpers for inspecting OpenAI model capabilities. + +This module is hand-maintained (not generated from the OpenAPI spec) because the +capability matrix is documented behaviour rather than schema. When OpenAI ships +a new model family, the registry in this file should be updated to match. + +Example: + >>> from openai import get_model_capabilities + >>> caps = get_model_capabilities("gpt-5.4-mini") + >>> caps.supports_reasoning + True + >>> caps.reasoning_effort_options + ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') + >>> get_model_capabilities("gpt-4.1").supports_reasoning + False +""" + +from __future__ import annotations + +from typing import Any, Tuple, Optional +from dataclasses import dataclass + +from ..types.shared.reasoning_effort import ReasoningEffort + +__all__ = ["ModelCapabilities", "get_model_capabilities"] + + +@dataclass(frozen=True) +class ModelCapabilities: + """Static capability metadata for an OpenAI model. + + Returned by :func:`get_model_capabilities`. All fields reflect the + *documented* behaviour of the model when called via the Chat Completions + or Responses APIs. They are not derived from a server-side source, so + edge cases (private deployments, beta flags, future model variants) may + differ. + """ + + family: str + """The model family identifier this capability set was matched against + (e.g. ``"gpt-5.4"``, ``"gpt-4o"``). + + Useful when dispatching on the family in addition to the exact model name.""" + + supports_temperature: bool + """Whether the model accepts the ``temperature`` parameter. + + Note: gpt-5.x reasoning models reject ``temperature`` whenever + ``reasoning_effort`` is anything other than ``"none"``. The conservative + default returned here is ``False`` for reasoning models, matching the + behaviour you should use unless you have explicitly opted into a + ``-chat-latest`` variant. + """ + + supports_reasoning: bool + """Whether the model accepts the ``reasoning`` parameter (Responses API) + or ``reasoning_effort`` (Chat Completions).""" + + reasoning_effort_options: Optional[Tuple[ReasoningEffort, ...]] + """Valid values for ``reasoning.effort``. + + ``None`` if the model does not support reasoning. Otherwise a tuple of + valid effort literals, in order of increasing intensity.""" + + +def _caps( + family: str, + *, + supports_temperature: bool, + supports_reasoning: bool, + reasoning_effort_options: Optional[Tuple[ReasoningEffort, ...]], +) -> ModelCapabilities: + return ModelCapabilities( + family=family, + supports_temperature=supports_temperature, + supports_reasoning=supports_reasoning, + reasoning_effort_options=reasoning_effort_options, + ) + + +# --------------------------------------------------------------------------- +# Family registry. +# +# Entries are matched by longest-prefix against the model string, with chat / +# search variants checked via the suffix test in `get_model_capabilities`. +# +# When OpenAI ships a new family, add an entry here. Order within this tuple +# does not matter; the lookup picks the longest matching prefix. +# --------------------------------------------------------------------------- + +# Effort scales reused across families. +_EFFORT_O_SERIES: Tuple[ReasoningEffort, ...] = ("low", "medium", "high") +_EFFORT_GPT5: Tuple[ReasoningEffort, ...] = ("minimal", "low", "medium", "high") +_EFFORT_GPT5_1: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high") +_EFFORT_GPT5_4: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high", "xhigh") + + +_FAMILIES: Tuple[ModelCapabilities, ...] = ( + # gpt-5.x reasoning models. Temperature is rejected unless you use a + # `-chat-latest` variant or set `reasoning_effort="none"`. + _caps("gpt-5.4", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_4), + _caps("gpt-5.3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), + _caps("gpt-5.2", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), + _caps("gpt-5.1", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), + _caps("gpt-5", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5), + # Classic chat families. + _caps("gpt-4.1", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + _caps("gpt-4o", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + _caps("gpt-4-turbo", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + _caps("gpt-4", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + _caps("gpt-3.5", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + _caps("chatgpt-4o-latest", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), + # o-series reasoning models. + _caps("o4-mini", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps("o3-pro", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps("o3-mini", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps( + "o3-deep-research", + supports_temperature=False, + supports_reasoning=True, + reasoning_effort_options=_EFFORT_O_SERIES, + ), + _caps("o3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps("o1-pro", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps("o1-mini", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + # o1-preview rejects temperature but doesn't expose the effort parameter. + # Must be matched before the broader "o1" prefix via longest-prefix logic. + _caps("o1-preview", supports_temperature=False, supports_reasoning=False, reasoning_effort_options=None), + _caps("o1", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_O_SERIES), + _caps( + "codex-mini", + supports_temperature=True, + supports_reasoning=True, + reasoning_effort_options=_EFFORT_O_SERIES, + ), + _caps( + "computer-use-preview", + supports_temperature=True, + supports_reasoning=False, + reasoning_effort_options=None, + ), +) + + +# Suffixes that override family defaults. A model ending in one of these is +# treated as a non-reasoning chat variant regardless of its family. +_CHAT_VARIANT_SUFFIXES: Tuple[str, ...] = ("-chat-latest", "-search-preview") + + +def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: + """Return capability metadata for ``model``, or ``None`` if unknown. + + The lookup is purely string-based: it does not call the OpenAI API. That + means it works in offline contexts (tests, build scripts, UIs that need to + decide which controls to render) but is only as fresh as this module's + registry. New model families need a corresponding entry here. + + Args: + model: A model identifier such as ``"gpt-5.4-mini"`` or + ``"gpt-4o-2024-08-06"``. Date suffixes and size variants + (``-mini``, ``-nano``) are handled automatically by longest-prefix + matching. + + Returns: + A :class:`ModelCapabilities` describing the model, or ``None`` if no + registered family matches. Callers should treat ``None`` as + "capability unknown" and fall back to feature-detecting at request + time (i.e. send the parameter and handle a 400 response). + + Example: + >>> get_model_capabilities("gpt-5.4-mini").reasoning_effort_options + ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') + >>> get_model_capabilities("gpt-5-chat-latest").supports_temperature + True + >>> get_model_capabilities("gpt-5").supports_temperature + False + >>> get_model_capabilities("nonexistent-model") is None + True + """ + # Runtime guard: callers may pass arbitrary values from config files, so + # we accept Any at the boundary and reject non-strings explicitly rather + # than relying on the type checker alone. + candidate: Any = model + if not isinstance(candidate, str) or not candidate: + return None + + # Longest matching prefix wins so that "gpt-5.4" beats "gpt-5", and "o1-pro" + # beats "o1". + best: Optional[ModelCapabilities] = None + for entry in _FAMILIES: + if not candidate.startswith(entry.family): + continue + if best is None or len(entry.family) > len(best.family): + best = entry + + if best is None: + return None + + # Chat / search variants override family defaults: gpt-5-chat-latest is a + # non-reasoning model even though gpt-5* normally is one. We still report + # the family so callers can group e.g. "gpt-5.2-chat-latest" with + # "gpt-5.2". + if any(candidate.endswith(suffix) for suffix in _CHAT_VARIANT_SUFFIXES): + return ModelCapabilities( + family=best.family, + supports_temperature=True, + supports_reasoning=False, + reasoning_effort_options=None, + ) + + return best diff --git a/tests/lib/test_model_capabilities.py b/tests/lib/test_model_capabilities.py new file mode 100644 index 0000000000..ed5cfc4de3 --- /dev/null +++ b/tests/lib/test_model_capabilities.py @@ -0,0 +1,259 @@ +from __future__ import annotations + +import pytest + +import openai +from openai.lib._models import ModelCapabilities, get_model_capabilities + + +class TestGpt5Family: + def test_gpt_5_uses_minimal_low_medium_high(self) -> None: + caps = get_model_capabilities("gpt-5") + assert caps is not None + assert caps.family == "gpt-5" + assert caps.supports_reasoning is True + assert caps.supports_temperature is False + assert caps.reasoning_effort_options == ("minimal", "low", "medium", "high") + + def test_gpt_5_size_variants_resolve_to_same_family(self) -> None: + for size in ("gpt-5", "gpt-5-mini", "gpt-5-nano"): + caps = get_model_capabilities(size) + assert caps is not None, size + assert caps.family == "gpt-5" + + def test_gpt_5_dated_snapshot(self) -> None: + caps = get_model_capabilities("gpt-5-mini-2025-08-07") + assert caps is not None + assert caps.family == "gpt-5" + assert caps.supports_reasoning is True + + def test_gpt_5_chat_latest_is_non_reasoning(self) -> None: + caps = get_model_capabilities("gpt-5-chat-latest") + assert caps is not None + # The family is still gpt-5 so callers can group variants together, + # but the capabilities mirror a classic chat model. + assert caps.family == "gpt-5" + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + assert caps.reasoning_effort_options is None + + +class TestGpt51Family: + def test_gpt_5_1_adds_none_to_effort(self) -> None: + caps = get_model_capabilities("gpt-5.1") + assert caps is not None + assert caps.family == "gpt-5.1" + assert caps.reasoning_effort_options == ( + "none", + "minimal", + "low", + "medium", + "high", + ) + + def test_gpt_5_1_codex(self) -> None: + caps = get_model_capabilities("gpt-5.1-codex") + assert caps is not None + assert caps.family == "gpt-5.1" + + def test_gpt_5_2_uses_same_effort_scale(self) -> None: + caps = get_model_capabilities("gpt-5.2-pro") + assert caps is not None + assert caps.family == "gpt-5.2" + assert caps.reasoning_effort_options == ( + "none", + "minimal", + "low", + "medium", + "high", + ) + + +class TestGpt54Family: + def test_gpt_5_4_adds_xhigh(self) -> None: + caps = get_model_capabilities("gpt-5.4") + assert caps is not None + assert caps.family == "gpt-5.4" + assert caps.reasoning_effort_options == ( + "none", + "minimal", + "low", + "medium", + "high", + "xhigh", + ) + + def test_gpt_5_4_size_variants(self) -> None: + for size in ("gpt-5.4", "gpt-5.4-mini", "gpt-5.4-nano"): + caps = get_model_capabilities(size) + assert caps is not None, size + assert caps.family == "gpt-5.4" + assert "xhigh" in (caps.reasoning_effort_options or ()) + + def test_gpt_5_4_dated_snapshot(self) -> None: + caps = get_model_capabilities("gpt-5.4-mini-2026-03-17") + assert caps is not None + assert caps.family == "gpt-5.4" + + +class TestGpt4Family: + def test_gpt_4_1_supports_temperature_no_reasoning(self) -> None: + caps = get_model_capabilities("gpt-4.1") + assert caps is not None + assert caps.family == "gpt-4.1" + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + assert caps.reasoning_effort_options is None + + def test_gpt_4_1_size_variants(self) -> None: + for size in ("gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"): + caps = get_model_capabilities(size) + assert caps is not None, size + assert caps.family == "gpt-4.1" + + def test_gpt_4o_supports_temperature_no_reasoning(self) -> None: + caps = get_model_capabilities("gpt-4o") + assert caps is not None + assert caps.family == "gpt-4o" + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + + def test_gpt_4o_search_preview_is_chat_variant(self) -> None: + caps = get_model_capabilities("gpt-4o-search-preview") + assert caps is not None + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + + def test_gpt_4_turbo(self) -> None: + caps = get_model_capabilities("gpt-4-turbo") + assert caps is not None + assert caps.family == "gpt-4-turbo" + assert caps.supports_temperature is True + + def test_gpt_4_base(self) -> None: + caps = get_model_capabilities("gpt-4") + assert caps is not None + assert caps.family == "gpt-4" + + def test_gpt_3_5_turbo(self) -> None: + caps = get_model_capabilities("gpt-3.5-turbo") + assert caps is not None + assert caps.family == "gpt-3.5" + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + + +class TestOSeriesFamily: + def test_o1_supports_reasoning(self) -> None: + caps = get_model_capabilities("o1") + assert caps is not None + assert caps.supports_reasoning is True + assert caps.supports_temperature is False + assert caps.reasoning_effort_options == ("low", "medium", "high") + + def test_o1_preview_does_not_expose_effort(self) -> None: + # o1-preview rejects temperature but doesn't expose the effort + # parameter. It must be matched before the broader "o1" prefix. + caps = get_model_capabilities("o1-preview") + assert caps is not None + assert caps.supports_reasoning is False + assert caps.supports_temperature is False + + def test_o1_mini_uses_o_series_effort_scale(self) -> None: + caps = get_model_capabilities("o1-mini") + assert caps is not None + assert caps.reasoning_effort_options == ("low", "medium", "high") + + def test_o3_pro_matches_before_o3(self) -> None: + caps = get_model_capabilities("o3-pro") + assert caps is not None + assert caps.family == "o3-pro" + + def test_o3_dated(self) -> None: + caps = get_model_capabilities("o3-2025-04-16") + assert caps is not None + assert caps.family == "o3" + assert caps.supports_reasoning is True + + def test_o4_mini(self) -> None: + caps = get_model_capabilities("o4-mini") + assert caps is not None + assert caps.supports_reasoning is True + + +class TestUnknownAndEdgeCases: + def test_unknown_model_returns_none(self) -> None: + assert get_model_capabilities("nonexistent-model") is None + + def test_empty_string_returns_none(self) -> None: + assert get_model_capabilities("") is None + + @pytest.mark.parametrize("bad_input", [None, 123, [], {}]) + def test_non_string_input_returns_none(self, bad_input: object) -> None: + # Runtime guard: callers may pass arbitrary values from config files. + assert get_model_capabilities(bad_input) is None # type: ignore[arg-type] + + def test_returns_model_capabilities_instance(self) -> None: + caps = get_model_capabilities("gpt-5") + assert isinstance(caps, ModelCapabilities) + + def test_capabilities_are_frozen(self) -> None: + from dataclasses import FrozenInstanceError + + caps = get_model_capabilities("gpt-5") + assert caps is not None + with pytest.raises(FrozenInstanceError): + caps.family = "mutated" # type: ignore[misc] + + +class TestExports: + def test_top_level_export(self) -> None: + # Documented public surface: importable directly from `openai`. + assert openai.get_model_capabilities is get_model_capabilities + assert openai.ModelCapabilities is ModelCapabilities + + def test_importable_from_openai_lib(self) -> None: + from openai.lib import ( + ModelCapabilities as LibCaps, + get_model_capabilities as lib_func, + ) + + assert LibCaps is ModelCapabilities + assert lib_func is get_model_capabilities + + +class TestRealisticUsage: + """The use case from the issue: dispatching parameters by model.""" + + def test_can_route_temperature_decision(self) -> None: + def should_send_temperature(model: str) -> bool: + caps = get_model_capabilities(model) + return caps.supports_temperature if caps else True # default permissive + + assert should_send_temperature("gpt-4o") is True + assert should_send_temperature("gpt-4.1-mini") is True + assert should_send_temperature("gpt-5") is False + assert should_send_temperature("gpt-5.4-nano") is False + assert should_send_temperature("o3-mini") is False + # Chat variants accept temperature even within reasoning families + assert should_send_temperature("gpt-5-chat-latest") is True + + def test_can_build_effort_dropdown(self) -> None: + def effort_options(model: str) -> tuple[str, ...]: + caps = get_model_capabilities(model) + if caps is None or caps.reasoning_effort_options is None: + return () + # Filter Nones (since ReasoningEffort is Optional[Literal[...]]) + return tuple(opt for opt in caps.reasoning_effort_options if opt is not None) + + assert effort_options("gpt-5") == ("minimal", "low", "medium", "high") + assert effort_options("gpt-5.4") == ( + "none", + "minimal", + "low", + "medium", + "high", + "xhigh", + ) + assert effort_options("gpt-4.1") == () + assert effort_options("o3") == ("low", "medium", "high") From 2171fb7cc575a66b8606981373fff8aeb83dfd51 Mon Sep 17 00:00:00 2001 From: SinhSinh An Date: Sat, 25 Apr 2026 11:42:38 -0500 Subject: [PATCH 2/2] fix(lib): address Codex review feedback on model capabilities Two P1 corrections from the automated Codex review on PR #3124: 1. Enforce segment boundary in prefix matching. `startswith()` was matching unknown identifiers like `gpt-5.10` as `gpt-5.1` and `o1-previewed` as `o1-preview`, causing the documented `None` fallback to be skipped. The lookup now requires the family prefix to either equal the model exactly or be followed by a `-`. 2. Align gpt-5 family effort options with the canonical SDK docs in `src/openai/types/shared/reasoning.py`: - gpt-5.1 (and -codex / -codex-max / -mini): `none/low/medium/high` -- removed `minimal`, which would have surfaced an unsupported value and triggered avoidable 400 responses. - gpt-5.2 / gpt-5.3 / gpt-5.4: `none/low/medium/high/xhigh` (`xhigh` is supported for models *after* gpt-5.1-codex-max). - gpt-5-pro: split into its own family with the high-only effort scale per the SDK docstring ("defaults to and only supports high"). Other improvements: - Chat / search variant detection now matches dated snapshots like `gpt-4o-search-preview-2025-03-11` via a regex instead of a strict `endswith` check. - New `TestBoundaryMatching` class with parametrized regression tests for prefix-collision cases (`gpt-5.10`, `gpt-5.1foo`, `o1-previewed`) and dated chat / search variants. - New `TestGpt5ProFamily` class covering the high-only effort scale. 49 of 49 new tests pass; 154 of 154 lib tests pass overall. --- src/openai/lib/_models.py | 89 ++++++++++++----- tests/lib/test_model_capabilities.py | 138 ++++++++++++++++++++++----- 2 files changed, 179 insertions(+), 48 deletions(-) diff --git a/src/openai/lib/_models.py b/src/openai/lib/_models.py index 5aea2ec2ec..0aa679d0a3 100644 --- a/src/openai/lib/_models.py +++ b/src/openai/lib/_models.py @@ -4,19 +4,23 @@ capability matrix is documented behaviour rather than schema. When OpenAI ships a new model family, the registry in this file should be updated to match. +The canonical source for reasoning-effort behaviour is the docstring on the +``Reasoning.effort`` parameter in ``src/openai/types/shared/reasoning.py``. + Example: >>> from openai import get_model_capabilities >>> caps = get_model_capabilities("gpt-5.4-mini") >>> caps.supports_reasoning True >>> caps.reasoning_effort_options - ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') + ('none', 'low', 'medium', 'high', 'xhigh') >>> get_model_capabilities("gpt-4.1").supports_reasoning False """ from __future__ import annotations +import re from typing import Any, Tuple, Optional from dataclasses import dataclass @@ -81,28 +85,43 @@ def _caps( # --------------------------------------------------------------------------- # Family registry. # -# Entries are matched by longest-prefix against the model string, with chat / -# search variants checked via the suffix test in `get_model_capabilities`. +# Entries are matched by longest *segment* prefix against the model string +# (i.e. the registered prefix must either equal the model exactly or be +# followed by a `-`), with chat / search variants checked via the suffix test +# in `get_model_capabilities`. # # When OpenAI ships a new family, add an entry here. Order within this tuple # does not matter; the lookup picks the longest matching prefix. # --------------------------------------------------------------------------- -# Effort scales reused across families. +# Effort scales reused across families. These mirror the prose in +# `src/openai/types/shared/reasoning.py`: +# +# - All models *before* gpt-5.1 default to medium and do NOT support `none`. +# gpt-5 base accepts `minimal/low/medium/high`. +# - gpt-5.1 supports `none/low/medium/high` (no `minimal`). +# - `xhigh` is supported for models *after* gpt-5.1-codex-max, i.e. +# gpt-5.2 onward, on top of the gpt-5.1 effort scale. +# - gpt-5-pro defaults to and only supports `high`. _EFFORT_O_SERIES: Tuple[ReasoningEffort, ...] = ("low", "medium", "high") -_EFFORT_GPT5: Tuple[ReasoningEffort, ...] = ("minimal", "low", "medium", "high") -_EFFORT_GPT5_1: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high") -_EFFORT_GPT5_4: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high", "xhigh") +_EFFORT_GPT5_BASE: Tuple[ReasoningEffort, ...] = ("minimal", "low", "medium", "high") +_EFFORT_GPT5_1: Tuple[ReasoningEffort, ...] = ("none", "low", "medium", "high") +_EFFORT_GPT5_2_PLUS: Tuple[ReasoningEffort, ...] = ("none", "low", "medium", "high", "xhigh") +_EFFORT_GPT5_PRO: Tuple[ReasoningEffort, ...] = ("high",) _FAMILIES: Tuple[ModelCapabilities, ...] = ( # gpt-5.x reasoning models. Temperature is rejected unless you use a # `-chat-latest` variant or set `reasoning_effort="none"`. - _caps("gpt-5.4", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_4), - _caps("gpt-5.3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), - _caps("gpt-5.2", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), + # + # gpt-5-pro is a high-only reasoning model and must be registered as its + # own family so longest-prefix matching beats the generic `gpt-5`. + _caps("gpt-5-pro", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_PRO), + _caps("gpt-5.4", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS), + _caps("gpt-5.3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS), + _caps("gpt-5.2", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS), _caps("gpt-5.1", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1), - _caps("gpt-5", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5), + _caps("gpt-5", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_BASE), # Classic chat families. _caps("gpt-4.1", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), _caps("gpt-4o", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None), @@ -142,9 +161,22 @@ def _caps( ) -# Suffixes that override family defaults. A model ending in one of these is -# treated as a non-reasoning chat variant regardless of its family. -_CHAT_VARIANT_SUFFIXES: Tuple[str, ...] = ("-chat-latest", "-search-preview") +# Matches `*-chat-latest` and `*-search-preview` (with an optional trailing +# `-YYYY-MM-DD` snapshot date), e.g. `gpt-4o-search-preview-2025-03-11`. +# These variants behave like classic chat models regardless of family. +_CHAT_VARIANT_RE = re.compile(r"-(?:chat-latest|search-preview)(?:-\d{4}-\d{2}-\d{2})?$") + + +def _matches_family(model: str, family: str) -> bool: + """Match ``model`` against a family prefix at a segment boundary. + + A model matches when it equals the family exactly or extends it with a + ``-`` separator. This prevents collisions like ``gpt-5.10`` being + misclassified as ``gpt-5.1``. + """ + if model == family: + return True + return model.startswith(family + "-") def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: @@ -155,11 +187,17 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: decide which controls to render) but is only as fresh as this module's registry. New model families need a corresponding entry here. + Matching is segment-aware: the registered prefix must either equal the + model exactly or be followed by a ``-`` separator. ``"gpt-5.10"`` will + therefore *not* match the ``gpt-5.1`` family and ``"o1-previewed"`` will + not match ``o1-preview``; both fall through to ``None`` so callers treat + them as unknown. + Args: model: A model identifier such as ``"gpt-5.4-mini"`` or ``"gpt-4o-2024-08-06"``. Date suffixes and size variants - (``-mini``, ``-nano``) are handled automatically by longest-prefix - matching. + (``-mini``, ``-nano``, ``-pro``) are handled automatically by + longest-prefix matching. Returns: A :class:`ModelCapabilities` describing the model, or ``None`` if no @@ -169,11 +207,15 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: Example: >>> get_model_capabilities("gpt-5.4-mini").reasoning_effort_options - ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') + ('none', 'low', 'medium', 'high', 'xhigh') >>> get_model_capabilities("gpt-5-chat-latest").supports_temperature True >>> get_model_capabilities("gpt-5").supports_temperature False + >>> get_model_capabilities("gpt-5-pro").reasoning_effort_options + ('high',) + >>> get_model_capabilities("gpt-5.10") is None + True >>> get_model_capabilities("nonexistent-model") is None True """ @@ -184,11 +226,12 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: if not isinstance(candidate, str) or not candidate: return None - # Longest matching prefix wins so that "gpt-5.4" beats "gpt-5", and "o1-pro" - # beats "o1". + # Longest matching family wins so that "gpt-5.4" beats "gpt-5", and + # "o1-pro" beats "o1". Segment boundary check rejects things like + # "gpt-5.10" claiming to be "gpt-5.1". best: Optional[ModelCapabilities] = None for entry in _FAMILIES: - if not candidate.startswith(entry.family): + if not _matches_family(candidate, entry.family): continue if best is None or len(entry.family) > len(best.family): best = entry @@ -199,8 +242,10 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]: # Chat / search variants override family defaults: gpt-5-chat-latest is a # non-reasoning model even though gpt-5* normally is one. We still report # the family so callers can group e.g. "gpt-5.2-chat-latest" with - # "gpt-5.2". - if any(candidate.endswith(suffix) for suffix in _CHAT_VARIANT_SUFFIXES): + # "gpt-5.2". The regex tolerates a trailing date snapshot like + # `-2025-03-11` so dated variants like `gpt-4o-search-preview-2025-03-11` + # are recognized too. + if _CHAT_VARIANT_RE.search(candidate): return ModelCapabilities( family=best.family, supports_temperature=True, diff --git a/tests/lib/test_model_capabilities.py b/tests/lib/test_model_capabilities.py index ed5cfc4de3..24f12e07e6 100644 --- a/tests/lib/test_model_capabilities.py +++ b/tests/lib/test_model_capabilities.py @@ -39,49 +39,69 @@ def test_gpt_5_chat_latest_is_non_reasoning(self) -> None: class TestGpt51Family: - def test_gpt_5_1_adds_none_to_effort(self) -> None: + def test_gpt_5_1_drops_minimal_adds_none(self) -> None: + # Per src/openai/types/shared/reasoning.py the supported gpt-5.1 + # effort values are none/low/medium/high. `minimal` was removed and + # would trigger 400 responses if surfaced in a UI. caps = get_model_capabilities("gpt-5.1") assert caps is not None assert caps.family == "gpt-5.1" - assert caps.reasoning_effort_options == ( - "none", - "minimal", - "low", - "medium", - "high", - ) + assert caps.reasoning_effort_options == ("none", "low", "medium", "high") + assert "minimal" not in (caps.reasoning_effort_options or ()) def test_gpt_5_1_codex(self) -> None: caps = get_model_capabilities("gpt-5.1-codex") assert caps is not None assert caps.family == "gpt-5.1" - def test_gpt_5_2_uses_same_effort_scale(self) -> None: + def test_gpt_5_1_codex_max(self) -> None: + # gpt-5.1-codex-max is the boundary referenced in the SDK docs: + # `xhigh` is supported for models *after* this one. So it should + # still match the gpt-5.1 family (no xhigh). + caps = get_model_capabilities("gpt-5.1-codex-max") + assert caps is not None + assert caps.family == "gpt-5.1" + assert "xhigh" not in (caps.reasoning_effort_options or ()) + + +class TestGpt52PlusFamilies: + def test_gpt_5_2_supports_xhigh(self) -> None: + # Per the SDK docstring, `xhigh` is supported for all models *after* + # gpt-5.1-codex-max, which means gpt-5.2 onward. + caps = get_model_capabilities("gpt-5.2") + assert caps is not None + assert caps.family == "gpt-5.2" + assert caps.reasoning_effort_options == ("none", "low", "medium", "high", "xhigh") + + def test_gpt_5_2_pro(self) -> None: caps = get_model_capabilities("gpt-5.2-pro") assert caps is not None assert caps.family == "gpt-5.2" - assert caps.reasoning_effort_options == ( - "none", - "minimal", - "low", - "medium", - "high", - ) + + +class TestGpt5ProFamily: + def test_gpt_5_pro_only_supports_high(self) -> None: + # Per the SDK docs, gpt-5-pro defaults to (and only supports) `high`. + caps = get_model_capabilities("gpt-5-pro") + assert caps is not None + assert caps.family == "gpt-5-pro" + assert caps.supports_reasoning is True + assert caps.reasoning_effort_options == ("high",) + + def test_gpt_5_pro_dated_snapshot(self) -> None: + caps = get_model_capabilities("gpt-5-pro-2025-10-06") + assert caps is not None + assert caps.family == "gpt-5-pro" + assert caps.reasoning_effort_options == ("high",) class TestGpt54Family: - def test_gpt_5_4_adds_xhigh(self) -> None: + def test_gpt_5_4_full_effort_scale(self) -> None: caps = get_model_capabilities("gpt-5.4") assert caps is not None assert caps.family == "gpt-5.4" - assert caps.reasoning_effort_options == ( - "none", - "minimal", - "low", - "medium", - "high", - "xhigh", - ) + assert caps.reasoning_effort_options == ("none", "low", "medium", "high", "xhigh") + assert "minimal" not in (caps.reasoning_effort_options or ()) def test_gpt_5_4_size_variants(self) -> None: for size in ("gpt-5.4", "gpt-5.4-mini", "gpt-5.4-nano"): @@ -181,6 +201,71 @@ def test_o4_mini(self) -> None: assert caps.supports_reasoning is True +class TestBoundaryMatching: + """Family lookup must respect segment boundaries. + + A registered prefix should only match the model when the prefix either + equals the model exactly or is followed by ``-``. Without this, a future + model name like ``gpt-5.10`` would silently impersonate ``gpt-5.1``. + """ + + @pytest.mark.parametrize( + "model", + [ + "gpt-5.10", # version typo / future model: must NOT match gpt-5.1 + "gpt-5.10-mini", + "gpt-5.1foo", + "o1-previewed", # near-typo of o1-preview + "o1-previews", + ], + ) + def test_boundary_collisions_do_not_misclassify(self, model: str) -> None: + # Either we return None (no match) or we match a strictly different + # family (e.g. `gpt-5.10` could match `gpt-5` segment-wise -- in that + # case we want it to NOT claim to be gpt-5.1). + caps = get_model_capabilities(model) + if caps is not None: + assert caps.family != "gpt-5.1", model + assert caps.family != "o1-preview", model + + def test_gpt_5_10_does_not_match_gpt_5_1(self) -> None: + # `gpt-5.10` is hypothetical, but the lookup must not classify it as + # gpt-5.1 just because of `startswith`. Today we'd expect None until + # a 5.10 family is registered. + assert get_model_capabilities("gpt-5.10") is None + + def test_o1_previewed_does_not_match_o1_preview(self) -> None: + # Without the boundary check, `o1-previewed` would be tagged as + # o1-preview (which has supports_reasoning=False), masking what is + # really an unknown model. + caps = get_model_capabilities("o1-previewed") + if caps is not None: + assert caps.family != "o1-preview" + + def test_exact_match_still_works(self) -> None: + # Sanity: removing collisions must not break the exact-match path. + for model in ("gpt-5.1", "o1-preview", "chatgpt-4o-latest", "gpt-5-pro"): + assert get_model_capabilities(model) is not None, model + + def test_dated_search_preview_still_recognized_as_chat_variant(self) -> None: + # `gpt-4o-search-preview-2025-03-11` should still count as a chat + # variant despite the trailing date snapshot. The previous suffix + # check (`endswith("-search-preview")`) silently regressed on dated + # models. + caps = get_model_capabilities("gpt-4o-search-preview-2025-03-11") + assert caps is not None + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + + def test_dated_chat_latest_still_recognized_as_chat_variant(self) -> None: + caps = get_model_capabilities("gpt-5.2-chat-latest-2025-12-11") + assert caps is not None + assert caps.supports_temperature is True + assert caps.supports_reasoning is False + # Family should still be reported as the underlying gpt-5.2 + assert caps.family == "gpt-5.2" + + class TestUnknownAndEdgeCases: def test_unknown_model_returns_none(self) -> None: assert get_model_capabilities("nonexistent-model") is None @@ -247,13 +332,14 @@ def effort_options(model: str) -> tuple[str, ...]: return tuple(opt for opt in caps.reasoning_effort_options if opt is not None) assert effort_options("gpt-5") == ("minimal", "low", "medium", "high") + assert effort_options("gpt-5.1") == ("none", "low", "medium", "high") assert effort_options("gpt-5.4") == ( "none", - "minimal", "low", "medium", "high", "xhigh", ) + assert effort_options("gpt-5-pro") == ("high",) assert effort_options("gpt-4.1") == () assert effort_options("o3") == ("low", "medium", "high")