From 2be94ca479e1a46a7ee053f0f6e6d733093a463e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 13:14:52 +0200 Subject: [PATCH 01/84] feat: Send GenAI spans as V2 envelope items --- sentry_sdk/client.py | 105 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 9f795d2489..ed58104ec7 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -27,6 +27,7 @@ get_before_send_metric, has_logs_enabled, has_metrics_enabled, + serialize_attribute, ) from sentry_sdk.serializer import serialize from sentry_sdk.tracing import trace @@ -56,6 +57,74 @@ ) from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor +from sentry_sdk.envelope import Item, PayloadRef + + +_ISO_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + + +def _iso_to_epoch(iso_str: str) -> float: + return ( + datetime.strptime(iso_str, _ISO_TIMESTAMP_FORMAT) + .replace(tzinfo=timezone.utc) + .timestamp() + ) + + +def _v1_span_to_v2(span: "Dict[str, Any]", event: "Dict[str, Any]") -> "Dict[str, Any]": + rv: "Dict[str, Any]" = { + "trace_id": span["trace_id"], + "span_id": span["span_id"], + "name": span.get("description") or "", + "is_segment": False, + "start_timestamp": _iso_to_epoch(span["start_timestamp"]), + "status": "ok", + } + + if span.get("timestamp"): + rv["end_timestamp"] = _iso_to_epoch(span["timestamp"]) + + if span.get("parent_span_id"): + rv["parent_span_id"] = span["parent_span_id"] + + status = span.get("status") + if status and status != "ok": + rv["status"] = "error" + + attributes: "Dict[str, Any]" = {} + + if span.get("op"): + attributes["sentry.op"] = span["op"] + if span.get("origin"): + attributes["sentry.origin"] = span["origin"] + + for key, value in (span.get("data") or {}).items(): + attributes[key] = value + for key, value in (span.get("tags") or {}).items(): + attributes[key] = value + + trace_context = event.get("contexts", {}).get("trace", {}) + sdk_info = event.get("sdk", {}) + + if event.get("release"): + attributes["sentry.release"] = event["release"] + if event.get("environment"): + attributes["sentry.environment"] = event["environment"] + if event.get("transaction"): + attributes["sentry.segment.name"] = event["transaction"] + + if trace_context.get("span_id"): + attributes["sentry.segment.id"] = trace_context["span_id"] + if sdk_info.get("name"): + attributes["sentry.sdk.name"] = sdk_info["name"] + if sdk_info.get("version"): + attributes["sentry.sdk.version"] = sdk_info["version"] + + if attributes: + rv["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()} + + return rv + if TYPE_CHECKING: from typing import Any @@ -72,7 +141,7 @@ from sentry_sdk.session import Session from sentry_sdk.spotlight import SpotlightClient from sentry_sdk.traces import StreamedSpan - from sentry_sdk.transport import Transport, Item + from sentry_sdk.transport import Transport, Item, PayloadRef from sentry_sdk._log_batcher import LogBatcher from sentry_sdk._metrics_batcher import MetricsBatcher from sentry_sdk.utils import Dsn @@ -912,7 +981,39 @@ def capture_event( if is_transaction: if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - envelope.add_transaction(event_opt) + + nonstreamed_spans = [] + streamed_spans = [] + for span in event_opt.get("spans") or []: + span_op = span.get("op") + if span_op is not None and span_op.startswith("gen_ai."): + streamed_spans.append(span) + else: + nonstreamed_spans.append(span) + + if nonstreamed_spans: + event_opt["spans"] = nonstreamed_spans + envelope.add_transaction(event_opt) + + if streamed_spans: + envelope.add_item( + Item( + type=SpanBatcher.TYPE, + content_type=SpanBatcher.CONTENT_TYPE, + headers={ + "item_count": len(streamed_spans), + }, + payload=PayloadRef( + json={ + "items": [ + _v1_span_to_v2(span, event) + for span in streamed_spans + ] + }, + ), + ) + ) + elif is_checkin: envelope.add_checkin(event_opt) else: From 01f479a09e4791082da604ba0f57cc4b74f1bf2f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 15:42:59 +0200 Subject: [PATCH 02/84] . --- sentry_sdk/client.py | 213 ++++++++++++++++++++++++++----------------- 1 file changed, 130 insertions(+), 83 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index ed58104ec7..8667c2b194 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -2,7 +2,7 @@ import uuid import random import socket -from collections.abc import Mapping +from collections.abc import Mapping, Iterable from datetime import datetime, timezone from importlib import import_module from typing import TYPE_CHECKING, List, Dict, cast, overload @@ -58,104 +58,156 @@ from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor from sentry_sdk.envelope import Item, PayloadRef +from sentry_sdk.utils import datetime_from_isoformat +if TYPE_CHECKING: + from typing import Any + from typing import Callable + from typing import Optional + from typing import Sequence + from typing import Type + from typing import Union + from typing import TypeVar + + from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory + from sentry_sdk.integrations import Integration + from sentry_sdk.scope import Scope + from sentry_sdk.session import Session + from sentry_sdk.spotlight import SpotlightClient + from sentry_sdk.traces import StreamedSpan + from sentry_sdk.transport import Transport, Item, PayloadRef + from sentry_sdk._log_batcher import LogBatcher + from sentry_sdk._metrics_batcher import MetricsBatcher + from sentry_sdk.utils import Dsn -_ISO_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + I = TypeVar("I", bound=Integration) # noqa: E741 +_client_init_debug = ContextVar("client_init_debug") -def _iso_to_epoch(iso_str: str) -> float: - return ( - datetime.strptime(iso_str, _ISO_TIMESTAMP_FORMAT) - .replace(tzinfo=timezone.utc) - .timestamp() - ) +SDK_INFO: "SDKInfo" = { + "name": "sentry.python", # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations() + "version": VERSION, + "packages": [{"name": "pypi:sentry-sdk", "version": VERSION}], +} -def _v1_span_to_v2(span: "Dict[str, Any]", event: "Dict[str, Any]") -> "Dict[str, Any]": - rv: "Dict[str, Any]" = { - "trace_id": span["trace_id"], - "span_id": span["span_id"], - "name": span.get("description") or "", - "is_segment": False, - "start_timestamp": _iso_to_epoch(span["start_timestamp"]), +def _serialized_v1_span_to_serialized_v2_span( + span: "Dict[str, Any]", event: "Event" +) -> "dict[str, Any]": + # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes". + res: "Dict[str, Any]" = { "status": "ok", + "is_segment": False, } - if span.get("timestamp"): - rv["end_timestamp"] = _iso_to_epoch(span["timestamp"]) + if "trace_id" in span: + res["trace_id"] = span["trace_id"] + + if "span_id" in span: + res["span_id"] = span["span_id"] + + if "description" in span: + res["name"] = span["description"] - if span.get("parent_span_id"): - rv["parent_span_id"] = span["parent_span_id"] + if "start_timestamp" in span: + start_timestamp = None + try: + start_timestamp = datetime_from_isoformat(span["start_timestamp"]) + except Exception: + pass + + if start_timestamp is not None: + res["start_timestamp"] = start_timestamp.timestamp() + + if "timestamp" in span: + end_timestamp = None + try: + end_timestamp = datetime_from_isoformat(span["timestamp"]) + except Exception: + pass - status = span.get("status") - if status and status != "ok": - rv["status"] = "error" + if end_timestamp is not None: + res["end_timestamp"] = end_timestamp.timestamp() + + if "parent_span_id" in span: + res["parent_span_id"] = span["parent_span_id"] + + if "status" in span and span["status"] != "ok": + res["status"] = "error" attributes: "Dict[str, Any]" = {} - if span.get("op"): + if "op" in span: attributes["sentry.op"] = span["op"] - if span.get("origin"): + if "origin" in span: attributes["sentry.origin"] = span["origin"] - for key, value in (span.get("data") or {}).items(): - attributes[key] = value - for key, value in (span.get("tags") or {}).items(): - attributes[key] = value - - trace_context = event.get("contexts", {}).get("trace", {}) - sdk_info = event.get("sdk", {}) - - if event.get("release"): + span_data = span.get("data") + if isinstance(span_data, dict): + attributes.update(span_data) + + span_tags = span.get("tags") + if isinstance(span_tags, dict): + attributes.update(span_tags) + + # See Scope._apply_user_attributes_to_telemetry() for user attributes. + user = event.get("user") + if isinstance(user, dict): + if "id" in user: + attributes["user.id"] = user["id"] + if "username" in user: + attributes["user.name"] = user["username"] + if "email" in user: + attributes["user.email"] = user["email"] + + # See Scope.set_global_attributes() for release, environment, and SDK metadata. + if "release" in event: attributes["sentry.release"] = event["release"] - if event.get("environment"): + if "environment" in event: attributes["sentry.environment"] = event["environment"] - if event.get("transaction"): + if "transaction" in event: attributes["sentry.segment.name"] = event["transaction"] - if trace_context.get("span_id"): + trace_context = event.get("contexts", {}).get("trace", {}) + if "span_id" in trace_context: attributes["sentry.segment.id"] = trace_context["span_id"] - if sdk_info.get("name"): - attributes["sentry.sdk.name"] = sdk_info["name"] - if sdk_info.get("version"): - attributes["sentry.sdk.version"] = sdk_info["version"] + + sdk_info = event.get("sdk") + if isinstance(sdk_info, dict): + if "name" in sdk_info: + attributes["sentry.sdk.name"] = sdk_info["name"] + if "version" in sdk_info: + attributes["sentry.sdk.version"] = sdk_info["version"] if attributes: - rv["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()} + res["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()} - return rv + return res -if TYPE_CHECKING: - from typing import Any - from typing import Callable - from typing import Optional - from typing import Sequence - from typing import Type - from typing import Union - from typing import TypeVar +def _split_gen_ai_spans( + event_opt: "Event", +) -> "tuple[List[Dict[str, object]], List[Dict[str, object]]]": + if "spans" not in event_opt: + return [], [] - from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory - from sentry_sdk.integrations import Integration - from sentry_sdk.scope import Scope - from sentry_sdk.session import Session - from sentry_sdk.spotlight import SpotlightClient - from sentry_sdk.traces import StreamedSpan - from sentry_sdk.transport import Transport, Item, PayloadRef - from sentry_sdk._log_batcher import LogBatcher - from sentry_sdk._metrics_batcher import MetricsBatcher - from sentry_sdk.utils import Dsn + spans = event_opt["spans"] + if isinstance(spans, AnnotatedValue): + spans = spans.value - I = TypeVar("I", bound=Integration) # noqa: E741 - -_client_init_debug = ContextVar("client_init_debug") + if not isinstance(spans, Iterable): + return [], [] + non_gen_ai_spans = [] + gen_ai_spans = [] + for span in spans: + span_op = span.get("op") + if isinstance(span_op, str) and span_op.startswith("gen_ai."): + gen_ai_spans.append(span) + else: + non_gen_ai_spans.append(span) -SDK_INFO: "SDKInfo" = { - "name": "sentry.python", # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations() - "version": VERSION, - "packages": [{"name": "pypi:sentry-sdk", "version": VERSION}], -} + return non_gen_ai_spans, gen_ai_spans def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]": @@ -982,32 +1034,27 @@ def capture_event( if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - nonstreamed_spans = [] - streamed_spans = [] - for span in event_opt.get("spans") or []: - span_op = span.get("op") - if span_op is not None and span_op.startswith("gen_ai."): - streamed_spans.append(span) - else: - nonstreamed_spans.append(span) + non_gen_ai_spans, gen_ai_spans = _split_gen_ai_spans(event_opt) - if nonstreamed_spans: - event_opt["spans"] = nonstreamed_spans - envelope.add_transaction(event_opt) + event_opt["spans"] = non_gen_ai_spans + envelope.add_transaction(event_opt) - if streamed_spans: + if gen_ai_spans: envelope.add_item( Item( type=SpanBatcher.TYPE, content_type=SpanBatcher.CONTENT_TYPE, headers={ - "item_count": len(streamed_spans), + "item_count": len(gen_ai_spans), }, payload=PayloadRef( json={ "items": [ - _v1_span_to_v2(span, event) - for span in streamed_spans + _serialized_v1_span_to_serialized_v2_span( + span, event + ) + for span in gen_ai_spans + if isinstance(span, dict) ] }, ), From 80e6a106b8472f6a6984ab254ca56646f0d51e59 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 15:43:59 +0200 Subject: [PATCH 03/84] . --- sentry_sdk/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 8667c2b194..41ab81c58e 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -75,7 +75,7 @@ from sentry_sdk.session import Session from sentry_sdk.spotlight import SpotlightClient from sentry_sdk.traces import StreamedSpan - from sentry_sdk.transport import Transport, Item, PayloadRef + from sentry_sdk.transport import Transport, Item from sentry_sdk._log_batcher import LogBatcher from sentry_sdk._metrics_batcher import MetricsBatcher from sentry_sdk.utils import Dsn From 0622cf410d9c6496d81d50ce163f52fa1d97eaee Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 15:44:35 +0200 Subject: [PATCH 04/84] . --- sentry_sdk/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 41ab81c58e..2895f23436 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -84,6 +84,7 @@ _client_init_debug = ContextVar("client_init_debug") + SDK_INFO: "SDKInfo" = { "name": "sentry.python", # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations() "version": VERSION, From 7c75da105649abe57a6e32946507d97c85c86123 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 16:01:06 +0200 Subject: [PATCH 05/84] . --- sentry_sdk/client.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 2895f23436..7bb2acf7dc 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -188,16 +188,16 @@ def _serialized_v1_span_to_serialized_v2_span( def _split_gen_ai_spans( event_opt: "Event", -) -> "tuple[List[Dict[str, object]], List[Dict[str, object]]]": +) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]": if "spans" not in event_opt: - return [], [] + return None spans = event_opt["spans"] if isinstance(spans, AnnotatedValue): spans = spans.value if not isinstance(spans, Iterable): - return [], [] + return None non_gen_ai_spans = [] gen_ai_spans = [] @@ -1035,12 +1035,15 @@ def capture_event( if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - non_gen_ai_spans, gen_ai_spans = _split_gen_ai_spans(event_opt) + split_spans = _split_gen_ai_spans(event_opt) + if split_spans is None or not split_spans[1]: + envelope.add_transaction(event_opt) + else: + non_gen_ai_spans, gen_ai_spans = split_spans - event_opt["spans"] = non_gen_ai_spans - envelope.add_transaction(event_opt) + event_opt["spans"] = non_gen_ai_spans + envelope.add_transaction(event_opt) - if gen_ai_spans: envelope.add_item( Item( type=SpanBatcher.TYPE, From 54a9b073a5887cdc51bd2d23253014e1bcb55c0f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 16:08:42 +0200 Subject: [PATCH 06/84] update --- sentry_sdk/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7bb2acf7dc..9ee225150d 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -93,10 +93,10 @@ def _serialized_v1_span_to_serialized_v2_span( - span: "Dict[str, Any]", event: "Event" + span: "dict[str, Any]", event: "Event" ) -> "dict[str, Any]": # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes". - res: "Dict[str, Any]" = { + res: "dict[str, Any]" = { "status": "ok", "is_segment": False, } From d1aa07cb2c201ab69a130e9b1b3705f2330d629b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 16:48:38 +0200 Subject: [PATCH 07/84] . --- sentry_sdk/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 9ee225150d..e02841d5a3 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -192,7 +192,7 @@ def _split_gen_ai_spans( if "spans" not in event_opt: return None - spans = event_opt["spans"] + spans: "Any" = event_opt["spans"] if isinstance(spans, AnnotatedValue): spans = spans.value From 117a6c9bf47342883a8cd4546582be97d39ad996 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 18:17:04 +0200 Subject: [PATCH 08/84] . --- sentry_sdk/client.py | 62 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index e02841d5a3..7c1eb64cff 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -7,6 +7,7 @@ from importlib import import_module from typing import TYPE_CHECKING, List, Dict, cast, overload import warnings +import json from sentry_sdk._compat import check_uwsgi_thread_support from sentry_sdk._metrics_batcher import MetricsBatcher @@ -27,10 +28,10 @@ get_before_send_metric, has_logs_enabled, has_metrics_enabled, - serialize_attribute, ) from sentry_sdk.serializer import serialize from sentry_sdk.tracing import trace +from sentry_sdk.traces import SpanStatus from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.transport import ( HttpTransportCore, @@ -39,6 +40,7 @@ ) from sentry_sdk.consts import ( SPANDATA, + SPANSTATUS, DEFAULT_MAX_VALUE_LENGTH, DEFAULT_OPTIONS, INSTRUMENTER, @@ -97,7 +99,7 @@ def _serialized_v1_span_to_serialized_v2_span( ) -> "dict[str, Any]": # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes". res: "dict[str, Any]" = { - "status": "ok", + "status": SpanStatus.OK.value, "is_segment": False, } @@ -133,7 +135,7 @@ def _serialized_v1_span_to_serialized_v2_span( if "parent_span_id" in span: res["parent_span_id"] = span["parent_span_id"] - if "status" in span and span["status"] != "ok": + if "status" in span and span["status"] != SPANSTATUS.OK: res["status"] = "error" attributes: "Dict[str, Any]" = {} @@ -180,8 +182,58 @@ def _serialized_v1_span_to_serialized_v2_span( if "version" in sdk_info: attributes["sentry.sdk.version"] = sdk_info["version"] - if attributes: - res["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()} + for key, value in attributes.items(): + serialized_value = serialize(value) + if isinstance(serialized_value, bool): + res.setdefault("attributes", {})[key] = { + "value": serialized_value, + "type": "boolean", + } + continue + + if isinstance(serialized_value, int): + res.setdefault("attributes", {})[key] = { + "value": serialized_value, + "type": "integer", + } + continue + + if isinstance(serialized_value, float): + res.setdefault("attributes", {})[key] = { + "value": serialized_value, + "type": "double", + } + continue + + if isinstance(serialized_value, str): + res.setdefault("attributes", {})[key] = { + "value": serialized_value, + "type": "string", + } + continue + + if isinstance(serialized_value, list): + if not serialized_value: + res.setdefault("attributes", {})[key] = {"value": [], "type": "array"} + + ty = type(serialized_value[0]) + if ty in (int, str, bool, float) and all( + type(v) is ty for v in serialized_value + ): + res.setdefault("attributes", {})[key] = { + "value": serialized_value, + "type": "array", + } + + continue + + # Types returned when the serializer for V1 span attributes recurses into some container types. + if isinstance(serialized_value, (dict, list)): + res.setdefault("attributes", {})[key] = { + "value": json.dumps(serialized_value), + "type": "string", + } + continue return res From 83c36b54c0c46847531db66f2ddc3d6d592d8a95 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 15 Apr 2026 18:25:21 +0200 Subject: [PATCH 09/84] . --- sentry_sdk/client.py | 118 ++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 52 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7c1eb64cff..c6df2f564b 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -71,7 +71,15 @@ from typing import Union from typing import TypeVar - from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory + from sentry_sdk._types import ( + Event, + Hint, + SDKInfo, + Log, + Metric, + EventDataCategory, + SerializedAttributeValue, + ) from sentry_sdk.integrations import Integration from sentry_sdk.scope import Scope from sentry_sdk.session import Session @@ -94,6 +102,56 @@ } +def _serialized_v1_attribute_to_serialized_v2_attribute( + attribute_value: "Any", +) -> "Optional[SerializedAttributeValue]": + if isinstance(attribute_value, bool): + return { + "value": attribute_value, + "type": "boolean", + } + + if isinstance(attribute_value, int): + return { + "value": attribute_value, + "type": "integer", + } + + if isinstance(attribute_value, float): + return { + "value": attribute_value, + "type": "double", + } + + if isinstance(attribute_value, str): + return { + "value": attribute_value, + "type": "string", + } + + if isinstance(attribute_value, list): + if not attribute_value: + return {"value": [], "type": "array"} + + ty = type(attribute_value[0]) + if ty in (int, str, bool, float) and all( + type(v) is ty for v in attribute_value + ): + return { + "value": attribute_value, + "type": "array", + } + + # Types returned when the serializer for V1 span attributes recurses into some container types. + if isinstance(attribute_value, (dict, list)): + return { + "value": json.dumps(attribute_value), + "type": "string", + } + + return None + + def _serialized_v1_span_to_serialized_v2_span( span: "dict[str, Any]", event: "Event" ) -> "dict[str, Any]": @@ -182,58 +240,14 @@ def _serialized_v1_span_to_serialized_v2_span( if "version" in sdk_info: attributes["sentry.sdk.version"] = sdk_info["version"] - for key, value in attributes.items(): - serialized_value = serialize(value) - if isinstance(serialized_value, bool): - res.setdefault("attributes", {})[key] = { - "value": serialized_value, - "type": "boolean", - } - continue - - if isinstance(serialized_value, int): - res.setdefault("attributes", {})[key] = { - "value": serialized_value, - "type": "integer", - } - continue - - if isinstance(serialized_value, float): - res.setdefault("attributes", {})[key] = { - "value": serialized_value, - "type": "double", - } - continue - - if isinstance(serialized_value, str): - res.setdefault("attributes", {})[key] = { - "value": serialized_value, - "type": "string", - } - continue - - if isinstance(serialized_value, list): - if not serialized_value: - res.setdefault("attributes", {})[key] = {"value": [], "type": "array"} - - ty = type(serialized_value[0]) - if ty in (int, str, bool, float) and all( - type(v) is ty for v in serialized_value - ): - res.setdefault("attributes", {})[key] = { - "value": serialized_value, - "type": "array", - } - - continue + if not attributes: + return res - # Types returned when the serializer for V1 span attributes recurses into some container types. - if isinstance(serialized_value, (dict, list)): - res.setdefault("attributes", {})[key] = { - "value": json.dumps(serialized_value), - "type": "string", - } - continue + res["attributes"] = {} + for key, value in attributes.items(): + res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute( + value + ) return res From f71e0ce84e3eacdbd46e0509f4f608c919778542 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 16 Apr 2026 10:50:59 +0200 Subject: [PATCH 10/84] openai tests --- tests/integrations/openai/test_openai.py | 891 ++++++++++++----------- 1 file changed, 450 insertions(+), 441 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index ada2e633de..e53f8e4f55 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -132,14 +132,14 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_chat_completion_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -163,27 +163,26 @@ def test_nonstreaming_chat_completion_no_prompts( ) assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -229,13 +228,13 @@ def test_nonstreaming_chat_completion_no_prompts( ), ], ) -def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request): +def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -256,30 +255,29 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ) assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -290,12 +288,12 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -308,14 +306,14 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ], ) async def test_nonstreaming_chat_completion_async_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -336,27 +334,26 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -404,14 +401,14 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ], ) async def test_nonstreaming_chat_completion_async( - sentry_init, capture_events, messages, request + sentry_init, capture_items, messages, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -429,30 +426,29 @@ async def test_nonstreaming_chat_completion_async( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -463,12 +459,12 @@ async def test_nonstreaming_chat_completion_async( }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 def tiktoken_encoding_if_installed(): @@ -491,7 +487,7 @@ def tiktoken_encoding_if_installed(): ) def test_streaming_chat_completion_no_prompts( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -507,7 +503,7 @@ def test_streaming_chat_completion_no_prompts( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -581,32 +577,31 @@ def test_streaming_chat_completion_no_prompts( ) assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -617,7 +612,7 @@ def test_streaming_chat_completion_no_prompts( ) def test_streaming_chat_completion_with_usage_in_stream( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -627,7 +622,7 @@ def test_streaming_chat_completion_with_usage_in_stream( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -684,13 +679,11 @@ def test_streaming_chat_completion_with_usage_in_stream( for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.skipif( @@ -699,7 +692,7 @@ def test_streaming_chat_completion_with_usage_in_stream( ) def test_streaming_chat_completion_empty_content_preserves_token_usage( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -709,7 +702,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -747,13 +740,11 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["data"] - assert span["data"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 @pytest.mark.skipif( @@ -763,7 +754,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( @pytest.mark.asyncio async def test_streaming_chat_completion_empty_content_preserves_token_usage_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -774,7 +765,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -814,13 +805,11 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy async for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["data"] - assert span["data"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 @pytest.mark.skipif( @@ -830,7 +819,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy @pytest.mark.asyncio async def test_streaming_chat_completion_async_with_usage_in_stream( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -841,7 +830,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -900,13 +889,11 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( async for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 # noinspection PyTypeChecker @@ -955,7 +942,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( ) def test_streaming_chat_completion( sentry_init, - capture_events, + capture_items, messages, request, get_model_response, @@ -971,7 +958,7 @@ def test_streaming_chat_completion( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -1041,30 +1028,29 @@ def test_streaming_chat_completion( map(lambda x: x.choices[0].delta.content, response_stream) ) assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -1075,22 +1061,22 @@ def test_streaming_chat_completion( }, ] - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import if "blocks" in param_id: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 else: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1107,7 +1093,7 @@ def test_streaming_chat_completion( ) async def test_streaming_chat_completion_async_no_prompts( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1124,7 +1110,7 @@ async def test_streaming_chat_completion_async_no_prompts( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -1201,32 +1187,31 @@ async def test_streaming_chat_completion_async_no_prompts( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1279,7 +1264,7 @@ async def test_streaming_chat_completion_async_no_prompts( ) async def test_streaming_chat_completion_async( sentry_init, - capture_events, + capture_items, messages, request, get_model_response, @@ -1296,7 +1281,7 @@ async def test_streaming_chat_completion_async( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1371,32 +1356,31 @@ async def test_streaming_chat_completion_async( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -1407,28 +1391,28 @@ async def test_streaming_chat_completion_async( }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import if "blocks" in param_id: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 else: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly -def test_bad_chat_completion(sentry_init, capture_events): +def test_bad_chat_completion(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -1440,13 +1424,13 @@ def test_bad_chat_completion(sentry_init, capture_events): messages=[{"role": "system", "content": "hello"}], ) - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" -def test_span_status_error(sentry_init, capture_events): +def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction", "span") with start_transaction(name="test"): client = OpenAI(api_key="z") @@ -1458,17 +1442,20 @@ def test_span_status_error(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (error, transaction) = events - assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" + (event,) = (item.payload for item in items if item.type == "event") + assert event["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" @pytest.mark.asyncio -async def test_bad_chat_completion_async(sentry_init, capture_events): +async def test_bad_chat_completion_async(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( @@ -1479,7 +1466,7 @@ async def test_bad_chat_completion_async(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" @@ -1492,14 +1479,14 @@ async def test_bad_chat_completion_async(sentry_init, capture_events): ], ) def test_embeddings_create_no_pii( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") @@ -1521,17 +1508,15 @@ def test_embeddings_create_no_pii( assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -1577,13 +1562,13 @@ def test_embeddings_create_no_pii( ), ], ) -def test_embeddings_create(sentry_init, capture_events, input, request): +def test_embeddings_create(sentry_init, capture_items, input, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") @@ -1603,24 +1588,24 @@ def test_embeddings_create(sentry_init, capture_events, input, request): assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" param_id = request.node.callspec.id if param_id == "string": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ "First text", "Second text", "Third text", ] elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ 5, 8, 13, @@ -1628,13 +1613,13 @@ def test_embeddings_create(sentry_init, capture_events, input, request): 34, ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -1647,14 +1632,14 @@ def test_embeddings_create(sentry_init, capture_events, input, request): ], ) async def test_embeddings_create_async_no_pii( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1676,17 +1661,15 @@ async def test_embeddings_create_async_no_pii( assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -1733,13 +1716,13 @@ async def test_embeddings_create_async_no_pii( ), ], ) -async def test_embeddings_create_async(sentry_init, capture_events, input, request): +async def test_embeddings_create_async(sentry_init, capture_items, input, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1761,24 +1744,24 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" param_id = request.node.callspec.id if param_id == "string": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ "First text", "Second text", "Third text", ] elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ 5, 8, 13, @@ -1786,13 +1769,13 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque 34, ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -1800,14 +1783,14 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque [(True, True), (True, False), (False, True), (False, False)], ) def test_embeddings_create_raises_error( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("event") client = OpenAI(api_key="z") @@ -1818,7 +1801,7 @@ def test_embeddings_create_raises_error( with pytest.raises(OpenAIError): client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" @@ -1828,14 +1811,14 @@ def test_embeddings_create_raises_error( [(True, True), (True, False), (False, True), (False, False)], ) async def test_embeddings_create_raises_error_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1846,16 +1829,16 @@ async def test_embeddings_create_raises_error_async( with pytest.raises(OpenAIError): await client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" -def test_span_origin_nonstreaming_chat(sentry_init, capture_events): +def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -1865,19 +1848,20 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio -async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): +async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -1887,18 +1871,19 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" -def test_span_origin_streaming_chat(sentry_init, capture_events): +def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") returned_stream = Stream(cast_to=None, response=None, client=client) @@ -1946,21 +1931,22 @@ def test_span_origin_streaming_chat(sentry_init, capture_events): "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = events + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["contexts"]["trace"]["origin"] == "manual" - assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio async def test_span_origin_streaming_chat_async( - sentry_init, capture_events, async_iterator + sentry_init, capture_items, async_iterator ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) @@ -2014,18 +2000,19 @@ async def test_span_origin_streaming_chat_async( # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" -def test_span_origin_embeddings(sentry_init, capture_events): + +def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") @@ -2043,19 +2030,20 @@ def test_span_origin_embeddings(sentry_init, capture_events): with start_transaction(name="openai tx"): client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio -async def test_span_origin_embeddings_async(sentry_init, capture_events): +async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") @@ -2073,10 +2061,11 @@ async def test_span_origin_embeddings_async(sentry_init, capture_events): with start_transaction(name="openai tx"): await client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" def test_completions_token_usage_from_response(): @@ -2442,12 +2431,12 @@ def count_tokens(msg): @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): +def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) @@ -2462,13 +2451,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + assert spans[0]["attributes"] == { "gen_ai.operation.name": "responses", "gen_ai.request.max_tokens": 100, "gen_ai.request.temperature": 0.7, @@ -2482,13 +2468,21 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert "gen_ai.system_instructions" not in spans[0]["data"] - assert "gen_ai.request.messages" not in spans[0]["data"] - assert "gen_ai.response.text" not in spans[0]["data"] + assert "gen_ai.system_instructions" not in spans[0]["attributes"] + assert "gen_ai.request.messages" not in spans[0]["attributes"] + assert "gen_ai.response.text" not in spans[0]["attributes"] @pytest.mark.parametrize( @@ -2557,14 +2551,14 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_ai_client_span_responses_api( - sentry_init, capture_events, instructions, input, request + sentry_init, capture_items, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) @@ -2579,12 +2573,9 @@ def test_ai_client_span_responses_api( top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -2601,6 +2592,14 @@ def test_ai_client_span_responses_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -2759,17 +2758,17 @@ def test_ai_client_span_responses_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_error_in_responses_api(sentry_init, capture_events): +def test_error_in_responses_api(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("event", "transaction", "span") client = OpenAI(api_key="z") client.responses._post = mock.Mock( @@ -2784,15 +2783,17 @@ def test_error_in_responses_api(sentry_init, capture_events): input="How do I check if a Python object is an instance of a class?", ) - (error_event, transaction_event) = events - - assert transaction_event["type"] == "transaction" # make sure the span where the error occurred is captured - assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] @@ -2866,14 +2867,14 @@ def test_error_in_responses_api(sentry_init, capture_events): ], ) async def test_ai_client_span_responses_async_api( - sentry_init, capture_events, instructions, input, request + sentry_init, capture_items, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) @@ -2888,12 +2889,9 @@ async def test_ai_client_span_responses_async_api( top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -2911,6 +2909,14 @@ async def test_ai_client_span_responses_async_api( "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -3069,7 +3075,7 @@ async def test_ai_client_span_responses_async_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.asyncio @@ -3140,7 +3146,7 @@ async def test_ai_client_span_responses_async_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( sentry_init, - capture_events, + capture_items, instructions, input, request, @@ -3153,7 +3159,7 @@ async def test_ai_client_span_streaming_responses_async_api( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3178,11 +3184,12 @@ async def test_ai_client_span_streaming_responses_async_api( async for _ in result: pass - (transaction,) = events - spans = [span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES] + spans = [item.payload for item in items if item.type == "span"] + spans = [ + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES + ] assert len(spans) == 1 - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -3200,6 +3207,14 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "hello world", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -3358,18 +3373,18 @@ async def test_ai_client_span_streaming_responses_async_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_error_in_responses_async_api(sentry_init, capture_events): +async def test_error_in_responses_async_api(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("event", "transaction", "span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock( @@ -3384,15 +3399,17 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): input="How do I check if a Python object is an instance of a class?", ) - (error_event, transaction_event) = events - - assert transaction_event["type"] == "transaction" # make sure the span where the error occurred is captured - assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] @@ -3479,7 +3496,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3494,7 +3511,7 @@ def test_streaming_responses_api( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3525,26 +3542,25 @@ def test_streaming_responses_api( assert response_string == "hello world" - (transaction,) = events - (span,) = transaction["spans"] - assert span["op"] == "gen_ai.responses" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -3555,7 +3571,7 @@ def test_streaming_responses_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3571,7 +3587,7 @@ async def test_streaming_responses_api_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3600,26 +3616,25 @@ async def test_streaming_responses_api_async( assert response_string == "hello world" - (transaction,) = events - (span,) = transaction["spans"] - assert span["op"] == "gen_ai.responses" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.skipif( @@ -3630,12 +3645,12 @@ async def test_streaming_responses_api_async( "tools", [[], None, NOT_GIVEN, omit], ) -def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): +def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3647,10 +3662,9 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): tools=tools, ) - (event,) = events - span = event["spans"][0] + span = next(item.payload for item in items if item.type == "span") - assert "gen_ai.request.available_tools" not in span["data"] + assert "gen_ai.request.available_tools" not in span["attributes"] # Test messages with mixed roles including "ai" that should be mapped to "assistant" @@ -3669,7 +3683,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): ], ) def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_items, test_message, expected_role ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" @@ -3678,7 +3692,7 @@ def test_openai_message_role_mapping( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3688,28 +3702,27 @@ def test_openai_message_role_mapping( with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) # Verify that the span was created correctly - (event,) = events - span = event["spans"][0] - assert span["op"] == "gen_ai.chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] # Parse the stored messages import json - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == expected_role -def test_openai_message_truncation(sentry_init, capture_events): +def test_openai_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3730,17 +3743,17 @@ def test_openai_message_truncation(sentry_init, capture_events): messages=large_messages, ) - (event,) = events - span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) + (event,) = (item.payload for item in items if item.type == "transaction") meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] @@ -3749,7 +3762,7 @@ def test_openai_message_truncation(sentry_init, capture_events): # noinspection PyTypeChecker def test_streaming_chat_completion_ttft( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """ Test that streaming chat completions capture time-to-first-token (TTFT). @@ -3758,7 +3771,7 @@ def test_streaming_chat_completion_ttft( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3810,13 +3823,12 @@ def test_streaming_chat_completion_ttft( for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3825,7 +3837,7 @@ def test_streaming_chat_completion_ttft( @pytest.mark.asyncio async def test_streaming_chat_completion_ttft_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -3837,7 +3849,7 @@ async def test_streaming_chat_completion_ttft_async( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3891,13 +3903,12 @@ async def test_streaming_chat_completion_ttft_async( async for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3905,7 +3916,7 @@ async def test_streaming_chat_completion_ttft_async( # noinspection PyTypeChecker @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api_ttft( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """ Test that streaming responses API captures time-to-first-token (TTFT). @@ -3914,7 +3925,7 @@ def test_streaming_responses_api_ttft( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3936,13 +3947,12 @@ def test_streaming_responses_api_ttft( for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.responses" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3952,7 +3962,7 @@ def test_streaming_responses_api_ttft( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_ttft_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -3964,7 +3974,7 @@ async def test_streaming_responses_api_ttft_async( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3986,12 +3996,11 @@ async def test_streaming_responses_api_ttft_async( async for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.responses" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 From 1fab6321ef8a6eb80ecc8fc44c2c733c959a62b4 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 16 Apr 2026 11:43:47 +0200 Subject: [PATCH 11/84] anthropic tests --- .../integrations/anthropic/test_anthropic.py | 1478 +++++++++-------- 1 file changed, 747 insertions(+), 731 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e86f7e1fa9..c7fc280b6c 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -91,14 +91,14 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_create_message( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -120,37 +120,38 @@ def test_nonstreaming_create_message( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -164,14 +165,14 @@ def test_nonstreaming_create_message( ], ) async def test_nonstreaming_create_message_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -193,36 +194,37 @@ async def test_nonstreaming_create_message_async( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.parametrize( @@ -236,7 +238,7 @@ async def test_nonstreaming_create_message_async( ) def test_streaming_create_message( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -286,7 +288,7 @@ def test_streaming_create_message( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -308,42 +310,45 @@ def test_streaming_create_message( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] def test_streaming_create_message_close( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -391,7 +396,7 @@ def test_streaming_create_message_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -415,31 +420,34 @@ def test_streaming_create_message_close( messages.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -448,7 +456,7 @@ def test_streaming_create_message_close( ) def test_streaming_create_message_api_error( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -491,7 +499,7 @@ def test_streaming_create_message_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -513,34 +521,36 @@ def test_streaming_create_message_api_error( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -555,7 +565,7 @@ def test_streaming_create_message_api_error( ) def test_stream_messages( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -605,7 +615,7 @@ def test_stream_messages( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -628,42 +638,45 @@ def test_stream_messages( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] def test_stream_messages_close( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -711,7 +724,7 @@ def test_stream_messages_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -740,31 +753,34 @@ def test_stream_messages_close( stream.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -773,7 +789,7 @@ def test_stream_messages_close( ) def test_stream_messages_api_error( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -816,7 +832,7 @@ def test_stream_messages_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -839,34 +855,36 @@ def test_stream_messages_api_error( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -882,7 +900,7 @@ def test_stream_messages_api_error( ) async def test_streaming_create_message_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -936,7 +954,7 @@ async def test_streaming_create_message_async( default_integrations=False, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -958,44 +976,45 @@ async def test_streaming_create_message_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio async def test_streaming_create_message_async_close( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1046,7 +1065,7 @@ async def test_streaming_create_message_async_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1069,31 +1088,34 @@ async def test_streaming_create_message_async_close( await messages.__anext__() await messages.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1103,7 +1125,7 @@ async def test_streaming_create_message_async_close( @pytest.mark.asyncio async def test_streaming_create_message_async_api_error( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1149,7 +1171,7 @@ async def test_streaming_create_message_async_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1171,34 +1193,36 @@ async def test_streaming_create_message_async_api_error( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -1214,7 +1238,7 @@ async def test_streaming_create_message_async_api_error( ) async def test_stream_message_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1267,7 +1291,7 @@ async def test_stream_message_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1290,37 +1314,38 @@ async def test_stream_message_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1330,7 +1355,7 @@ async def test_stream_message_async( @pytest.mark.asyncio async def test_stream_messages_async_api_error( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1376,7 +1401,7 @@ async def test_stream_messages_async_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1399,41 +1424,43 @@ async def test_stream_messages_async_api_error( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @pytest.mark.asyncio async def test_stream_messages_async_close( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1484,7 +1511,7 @@ async def test_stream_messages_async_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1515,31 +1542,34 @@ async def test_stream_messages_async_close( await stream.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1557,7 +1587,7 @@ async def test_stream_messages_async_close( ) def test_streaming_create_message_with_input_json_delta( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1637,7 +1667,7 @@ def test_streaming_create_message_with_input_json_delta( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1659,38 +1689,36 @@ def test_streaming_create_message_with_input_json_delta( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.skipif( @@ -1708,7 +1736,7 @@ def test_streaming_create_message_with_input_json_delta( ) def test_stream_messages_with_input_json_delta( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1788,7 +1816,7 @@ def test_stream_messages_with_input_json_delta( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1811,38 +1839,36 @@ def test_stream_messages_with_input_json_delta( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -1861,7 +1887,7 @@ def test_stream_messages_with_input_json_delta( ) async def test_streaming_create_message_with_input_json_delta_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1947,7 +1973,7 @@ async def test_streaming_create_message_with_input_json_delta_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1969,39 +1995,37 @@ async def test_streaming_create_message_with_input_json_delta_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -2020,7 +2044,7 @@ async def test_streaming_create_message_with_input_json_delta_async( ) async def test_stream_message_with_input_json_delta_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2106,7 +2130,7 @@ async def test_stream_message_with_input_json_delta_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2129,44 +2153,42 @@ async def test_stream_message_with_input_json_delta_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_exception_message_create(sentry_init, capture_events): +def test_exception_message_create(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -2179,14 +2201,16 @@ def test_exception_message_create(sentry_init, capture_events): max_tokens=1024, ) - (event, transaction) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_status_error(sentry_init, capture_events): +def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "span") with start_transaction(name="anthropic"): client = Anthropic(api_key="z") @@ -2200,18 +2224,19 @@ def test_span_status_error(sentry_init, capture_events): max_tokens=1024, ) - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_span_status_error_async(sentry_init, capture_events): +async def test_span_status_error_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "span") with start_transaction(name="anthropic"): client = AsyncAnthropic(api_key="z") @@ -2225,18 +2250,19 @@ async def test_span_status_error_async(sentry_init, capture_events): max_tokens=1024, ) - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_exception_message_create_async(sentry_init, capture_events): +async def test_exception_message_create_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock( @@ -2249,17 +2275,19 @@ async def test_exception_message_create_async(sentry_init, capture_events): max_tokens=1024, ) - (event, transaction) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_origin(sentry_init, capture_events): +def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2274,21 +2302,22 @@ def test_span_origin(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_span_origin_async(sentry_init, capture_events): +async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2303,12 +2332,13 @@ async def test_span_origin_async(sentry_init, capture_events): with start_transaction(name="anthropic"): await client.messages.create(max_tokens=1024, messages=messages, model="model") - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.skipif( @@ -2392,7 +2422,7 @@ def test_set_output_data_with_input_json_delta(sentry_init): ], ) def test_anthropic_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_items, test_message, expected_role ): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -2400,7 +2430,7 @@ def test_anthropic_message_role_mapping( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2425,29 +2455,28 @@ def mock_messages_create(*args, **kwargs): model="claude-3-opus", max_tokens=10, messages=test_messages ) - (event,) = events - span = event["spans"][0] + span = next(item.payload for item in items if item.type == "span") # Verify that the span was created correctly - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] # Parse the stored messages - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert stored_messages[0]["role"] == expected_role -def test_anthropic_message_truncation(sentry_init, capture_events): +def test_anthropic_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2466,21 +2495,18 @@ def test_anthropic_message_truncation(sentry_init, capture_events): with start_transaction(): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2488,18 +2514,19 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @pytest.mark.asyncio -async def test_anthropic_message_truncation_async(sentry_init, capture_events): +async def test_anthropic_message_truncation_async(sentry_init, capture_items): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2518,21 +2545,18 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): with start_transaction(): await client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2540,6 +2564,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -2553,7 +2578,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): ], ) def test_nonstreaming_create_message_with_system_prompt( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES.""" sentry_init( @@ -2561,7 +2586,7 @@ def test_nonstreaming_create_message_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2586,46 +2611,46 @@ def test_nonstreaming_create_message_with_system_prompt( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -2639,7 +2664,7 @@ def test_nonstreaming_create_message_with_system_prompt( ], ) async def test_nonstreaming_create_message_with_system_prompt_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async).""" sentry_init( @@ -2647,7 +2672,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2672,46 +2697,46 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.parametrize( @@ -2725,7 +2750,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( ) def test_streaming_create_message_with_system_prompt( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2776,7 +2801,7 @@ def test_streaming_create_message_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2802,46 +2827,46 @@ def test_streaming_create_message_with_system_prompt( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.parametrize( @@ -2855,7 +2880,7 @@ def test_streaming_create_message_with_system_prompt( ) def test_stream_messages_with_system_prompt( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2906,7 +2931,7 @@ def test_stream_messages_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2930,46 +2955,46 @@ def test_stream_messages_with_system_prompt( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -2984,7 +3009,7 @@ def test_stream_messages_with_system_prompt( ) async def test_stream_message_with_system_prompt_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3038,7 +3063,7 @@ async def test_stream_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -3062,46 +3087,46 @@ async def test_stream_message_with_system_prompt_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -3116,7 +3141,7 @@ async def test_stream_message_with_system_prompt_async( ) async def test_streaming_create_message_with_system_prompt_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3170,7 +3195,7 @@ async def test_streaming_create_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -3196,56 +3221,56 @@ async def test_streaming_create_message_with_system_prompt_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_system_prompt_with_complex_structure(sentry_init, capture_events): +def test_system_prompt_with_complex_structure(sentry_init, capture_items): """Test that complex system prompt structures (list of text blocks) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3268,17 +3293,18 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): ) assert response == EXAMPLE_MESSAGE - assert len(events) == 1 - (event,) = events - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] - system_instructions = json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) # System content should be a list of text blocks assert isinstance(system_instructions, list) @@ -3287,8 +3313,8 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): {"type": "text", "content": "Be concise and clear."}, ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3490,14 +3516,14 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -def test_message_with_base64_image(sentry_init, capture_events): +def test_message_with_base64_image(sentry_init, capture_items): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3521,12 +3547,11 @@ def test_message_with_base64_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3541,14 +3566,14 @@ def test_message_with_base64_image(sentry_init, capture_events): } -def test_message_with_url_image(sentry_init, capture_events): +def test_message_with_url_image(sentry_init, capture_items): """Test that messages with URL-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3571,11 +3596,10 @@ def test_message_with_url_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3585,14 +3609,14 @@ def test_message_with_url_image(sentry_init, capture_events): } -def test_message_with_file_image(sentry_init, capture_events): +def test_message_with_file_image(sentry_init, capture_items): """Test that messages with file_id-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3616,11 +3640,10 @@ def test_message_with_file_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3630,14 +3653,14 @@ def test_message_with_file_image(sentry_init, capture_events): } -def test_message_with_base64_pdf(sentry_init, capture_events): +def test_message_with_base64_pdf(sentry_init, capture_items): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3651,7 +3674,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events): "source": { "type": "base64", "media_type": "application/pdf", - "data": "JVBERi0xLjQKJeLj...base64pdfdata", + "attributes": "JVBERi0xLjQKJeLj...base64pdfdata", }, }, ], @@ -3661,11 +3684,10 @@ def test_message_with_base64_pdf(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -3675,14 +3697,14 @@ def test_message_with_base64_pdf(sentry_init, capture_events): } -def test_message_with_url_pdf(sentry_init, capture_events): +def test_message_with_url_pdf(sentry_init, capture_items): """Test that messages with URL-referenced PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3705,11 +3727,10 @@ def test_message_with_url_pdf(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3719,14 +3740,14 @@ def test_message_with_url_pdf(sentry_init, capture_events): } -def test_message_with_file_document(sentry_init, capture_events): +def test_message_with_file_document(sentry_init, capture_items): """Test that messages with file_id-referenced documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3750,11 +3771,10 @@ def test_message_with_file_document(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3764,14 +3784,14 @@ def test_message_with_file_document(sentry_init, capture_events): } -def test_message_with_mixed_content(sentry_init, capture_events): +def test_message_with_mixed_content(sentry_init, capture_items): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3785,7 +3805,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "source": { "type": "base64", "media_type": "image/png", - "data": "iVBORw0KGgo...base64imagedata", + "attributes": "iVBORw0KGgo...base64imagedata", }, }, { @@ -3800,7 +3820,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "source": { "type": "base64", "media_type": "application/pdf", - "data": "JVBERi0xLjQK...base64pdfdata", + "attributes": "JVBERi0xLjQK...base64pdfdata", }, }, {"type": "text", "text": "Please provide a detailed analysis."}, @@ -3811,11 +3831,10 @@ def test_message_with_mixed_content(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -3847,14 +3866,14 @@ def test_message_with_mixed_content(sentry_init, capture_events): } -def test_message_with_multiple_images_different_formats(sentry_init, capture_events): +def test_message_with_multiple_images_different_formats(sentry_init, capture_items): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3867,7 +3886,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64data1...", + "attributes": "base64data1...", }, }, { @@ -3893,11 +3912,10 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 @@ -3922,14 +3940,14 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve assert content[3] == {"type": "text", "text": "Compare these three images."} -def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events): +def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items): """Test that binary content is not stored when send_default_pii is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3943,7 +3961,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64encodeddatahere...", + "attributes": "base64encodeddatahere...", }, }, ], @@ -3953,22 +3971,21 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] -def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_events): +def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items): """Test that binary content is not stored when include_prompts is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3982,7 +3999,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64encodeddatahere...", + "attributes": "base64encodeddatahere...", }, }, ], @@ -3992,18 +4009,17 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] -def test_cache_tokens_nonstreaming(sentry_init, capture_events): +def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4029,16 +4045,16 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events): model="claude-3-5-sonnet-20241022", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 -def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events): +def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items): """ Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming). @@ -4051,7 +4067,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even cache_creation_input_tokens=2846, cache_read_input_tokens=0) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4077,16 +4093,16 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 -def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events): +def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items): """ Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming). @@ -4099,7 +4115,7 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event cache_creation_input_tokens=0, cache_read_input_tokens=2846) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4125,18 +4141,18 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 def test_input_tokens_include_cache_read_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4176,7 +4192,7 @@ def test_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4192,18 +4208,18 @@ def test_input_tokens_include_cache_read_streaming( ): pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4242,7 +4258,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4258,16 +4274,16 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( for event in stream: pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 + 2846 = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 -def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): +def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): """ Test that input_tokens is unchanged when there are no cached tokens. @@ -4275,7 +4291,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): Usage(input_tokens=20, output_tokens=12) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4299,15 +4315,15 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 def test_cache_tokens_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4343,7 +4359,7 @@ def test_cache_tokens_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4359,17 +4375,17 @@ def test_cache_tokens_streaming( ): pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 def test_stream_messages_cache_tokens( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """Test cache tokens are tracked for streaming responses.""" client = Anthropic(api_key="z") @@ -4403,7 +4419,7 @@ def test_stream_messages_cache_tokens( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4419,10 +4435,10 @@ def test_stream_messages_cache_tokens( for event in stream: pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 From f44316dfa45f83d02e7f65908340aeeadcfbe70f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 16 Apr 2026 15:24:52 +0200 Subject: [PATCH 12/84] google-genai tests --- .../google_genai/test_google_genai.py | 507 +++++++++--------- 1 file changed, 248 insertions(+), 259 deletions(-) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 6e91ba6634..e074b79c8c 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -124,14 +124,14 @@ def create_test_config( ], ) def test_nonstreaming_generate_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -146,38 +146,37 @@ def test_nonstreaming_generate_content( mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Tell me a joke", config=config ) - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai" - assert len(event["spans"]) == 1 - chat_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") # Check chat span - assert chat_span["op"] == OP.GEN_AI_CHAT - assert chat_span["description"] == "chat gemini-1.5-flash" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert chat_span["name"] == "chat gemini-1.5-flash" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" if send_default_pii and include_prompts: # Response text is stored as a JSON array - response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Parse the JSON array response_texts = json.loads(response_text) assert response_texts == ["Hello! How can I help you today?"] else: - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"] # Check token usage - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 @pytest.mark.parametrize("generate_content_config", (False, True)) @@ -210,7 +209,7 @@ def test_nonstreaming_generate_content( ) def test_generate_content_with_system_instruction( sentry_init, - capture_events, + capture_items, mock_genai_client, generate_content_config, system_instructions, @@ -221,7 +220,7 @@ def test_generate_content_with_system_instruction( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -243,16 +242,15 @@ def test_generate_content_with_system_instruction( config=config, ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") if expected_texts is None: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"] return # (PII is enabled and include_prompts is True in this test) system_instructions = json.loads( - invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ @@ -260,12 +258,12 @@ def test_generate_content_with_system_instruction( ] -def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client): +def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -319,18 +317,17 @@ def get_weather(location: str) -> str: model="gemini-1.5-flash", contents="What's the weather?", config=config ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Check that tools are recorded (data is serialized as a string) - tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] # Parse the JSON string to verify content tools_data = json.loads(tools_data_str) assert len(tools_data) == 2 # The order of tools may not be guaranteed, so sort by name and description for comparison sorted_tools = sorted( - tools_data, key=lambda t: (t.get("name", ""), t.get("description", "")) + tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) ) # The function tool @@ -342,13 +339,13 @@ def get_weather(location: str) -> str: assert sorted_tools[1]["description"] == "Get weather information (tool object)" -def test_tool_execution(sentry_init, capture_events): +def test_tool_execution(sentry_init, capture_items): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -366,25 +363,25 @@ def get_weather(location: str) -> str: assert result == "The weather in San Francisco is sunny" - (event,) = events - assert len(event["spans"]) == 1 - tool_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + tool_span = next(item.payload for item in items if item.type == "span") - assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL - assert tool_span["description"] == "execute_tool get_weather" - assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" + assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + assert tool_span["name"] == "execute_tool get_weather" + assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" assert ( - tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] == "Get the weather for a location" ) -def test_error_handling(sentry_init, capture_events, mock_genai_client): +def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction") # Mock an error at the HTTP level with mock.patch.object( @@ -399,8 +396,8 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client): ) # Should have both transaction and error events - assert len(events) == 2 - error_event, transaction_event = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -408,14 +405,14 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client): assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai" -def test_streaming_generate_content(sentry_init, capture_events, mock_genai_client): +def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client): """Test streaming with generate_content_stream, verifying chunk accumulation.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Create streaming chunks - simulating a multi-chunk response # Chunk 1: First part of text with partial usage metadata @@ -497,40 +494,41 @@ def test_streaming_generate_content(sentry_init, capture_events, mock_genai_clie assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" - (event,) = events - - assert len(event["spans"]) == 1 - chat_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") # Check that streaming flag is set on both spans - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True # Verify accumulated response text (all chunks combined) expected_full_text = "Hello! How can I help you today?" # Response text is stored as a JSON string - chat_response_text = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]) + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) assert chat_response_text == [expected_full_text] # Verify finish reasons (only the final chunk has a finish reason) # When there's a single finish reason, it's stored as a plain string (not JSON) - assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"] - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"] + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 # Verify model name - assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" -def test_span_origin(sentry_init, capture_events, mock_genai_client): +def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -543,22 +541,21 @@ def test_span_origin(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents="Test origin", config=config ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" -def test_response_without_usage_metadata( - sentry_init, capture_events, mock_genai_client -): + +def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client): """Test handling of responses without usage metadata""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without usage metadata response_json = { @@ -584,23 +581,22 @@ def test_response_without_usage_metadata( model="gemini-1.5-flash", contents="Test", config=config ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") # Usage data should not be present - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"] - assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"] - assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"] -def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): +def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): """Test handling of multiple response candidates""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Response with multiple candidates multi_candidate_json = { @@ -638,12 +634,11 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents="Generate multiple", config=config ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") # Should capture all responses # Response text is stored as a JSON string when there are multiple responses - response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] if isinstance(response_text, str) and response_text.startswith("["): # It's a JSON array response_list = json.loads(response_text) @@ -654,18 +649,18 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): # Finish reasons are serialized as JSON finish_reasons = json.loads( - chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] ) assert finish_reasons == ["STOP", "MAX_TOKENS"] -def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_client): +def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client): """Test that all configuration parameters are properly recorded""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -686,26 +681,25 @@ def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_cl model="gemini-1.5-flash", contents="Test all params", config=config ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Check all parameters are recorded - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 -def test_empty_response(sentry_init, capture_events, mock_genai_client): +def test_empty_response(sentry_init, capture_items, mock_genai_client): """Test handling of minimal response with no content""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Minimal response with empty candidates array minimal_response_json = {"candidates": []} @@ -723,20 +717,20 @@ def test_empty_response(sentry_init, capture_events, mock_genai_client): assert response is not None assert len(response.candidates) == 0 - (event,) = events # Should still create spans even with empty candidates - assert len(event["spans"]) == 1 + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 def test_response_with_different_id_fields( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test handling of different response ID field names""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response with response_id and model_version response_json = { @@ -763,20 +757,21 @@ def test_response_with_different_id_fields( model="gemini-1.5-flash", contents="Test", config=create_test_config() ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001" + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] + == "gemini-1.5-flash-001" + ) -def test_tool_with_async_function(sentry_init, capture_events): +def test_tool_with_async_function(sentry_init): """Test that async tool functions are properly wrapped""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - capture_events() # Create an async tool function async def async_tool(param: str) -> str: @@ -792,14 +787,14 @@ async def async_tool(param: str) -> str: assert hasattr(wrapped_async_tool, "__wrapped__") # Should preserve original -def test_contents_as_none(sentry_init, capture_events, mock_genai_client): +def test_contents_as_none(sentry_init, capture_items, mock_genai_client): """Test handling when contents parameter is None""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -811,22 +806,21 @@ def test_contents_as_none(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents=None, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Should handle None contents gracefully - messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) + messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) # Should only have system message if any, not user message assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages) -def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): +def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): """Test extraction of tool/function calls from response""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response with function calls function_call_response_json = { @@ -875,14 +869,17 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): config=create_test_config(), ) - (event,) = events - chat_span = event["spans"][0] # The chat span + chat_span = next( + item.payload for item in items if item.type == "span" + ) # The chat span # Check that tool calls are extracted and stored - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"] # Parse the JSON string to verify content - tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]) + tool_calls = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + ) assert len(tool_calls) == 2 @@ -902,16 +899,14 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -def test_google_genai_message_truncation( - sentry_init, capture_events, mock_genai_client -): +def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -930,11 +925,10 @@ def test_google_genai_message_truncation( config=create_test_config(), ) - (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + invoke_span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -980,14 +974,14 @@ def test_google_genai_message_truncation( ], ) def test_embed_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1006,47 +1000,49 @@ def test_embed_content( ], ) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai_embeddings" # Should have 1 span for embeddings - assert len(event["spans"]) == 1 - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans # Check embeddings span - assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["description"] == "embeddings text-embedding-004" - assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + ) # Check input texts if PII is allowed if send_default_pii and include_prompts: - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 -def test_embed_content_string_input(sentry_init, capture_events, mock_genai_client): +def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client): """Test embed_content with a single string instead of list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1074,25 +1070,25 @@ def test_embed_content_string_input(sentry_init, capture_events, mock_genai_clie contents="Single text input", ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # Check that single string is handled correctly - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 -def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_client): +def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client): """Test error handling in embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "event") # Mock an error at the HTTP level with mock.patch.object( @@ -1108,8 +1104,8 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl ) # Should have both transaction and error events - assert len(events) == 2 - error_event, _ = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1118,14 +1114,14 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl def test_embed_content_without_statistics( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1150,21 +1146,21 @@ def test_embed_content_without_statistics( contents=["Test without statistics", "Another test"], ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # No usage tokens since there are no statistics in older versions # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] -def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_client): +def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client): """Test that embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1177,11 +1173,12 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien contents=["Test origin"], ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" @pytest.mark.asyncio @@ -1195,7 +1192,7 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien ], ) async def test_async_embed_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): """Test async embed_content method.""" sentry_init( @@ -1203,7 +1200,7 @@ async def test_async_embed_content( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the async HTTP response mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1222,42 +1219,44 @@ async def test_async_embed_content( ], ) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai_embeddings_async" # Should have 1 span for embeddings - assert len(event["spans"]) == 1 - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans # Check embeddings span - assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["description"] == "embeddings text-embedding-004" - assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + ) # Check input texts if PII is allowed if send_default_pii and include_prompts: - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 @pytest.mark.asyncio async def test_async_embed_content_string_input( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test async embed_content with a single string instead of list.""" sentry_init( @@ -1265,7 +1264,7 @@ async def test_async_embed_content_string_input( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1293,28 +1292,28 @@ async def test_async_embed_content_string_input( contents="Single text input", ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # Check that single string is handled correctly - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 @pytest.mark.asyncio async def test_async_embed_content_error_handling( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test error handling in async embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "event") # Mock an error at the HTTP level with mock.patch.object( @@ -1330,8 +1329,8 @@ async def test_async_embed_content_error_handling( ) # Should have both transaction and error events - assert len(events) == 2 - error_event, _ = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1341,14 +1340,14 @@ async def test_async_embed_content_error_handling( @pytest.mark.asyncio async def test_async_embed_content_without_statistics( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test async embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1373,24 +1372,24 @@ async def test_async_embed_content_without_statistics( contents=["Test without statistics", "Another test"], ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # No usage tokens since there are no statistics in older versions # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] @pytest.mark.asyncio async def test_async_embed_content_span_origin( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test that async embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1403,16 +1402,17 @@ async def test_async_embed_content_span_origin( contents=["Test origin"], ) - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" # Integration tests for generate_content with different input message formats def test_generate_content_with_content_object( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with Content object input.""" sentry_init( @@ -1420,7 +1420,7 @@ def test_generate_content_with_content_object( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1437,10 +1437,9 @@ def test_generate_content_with_content_object( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1449,7 +1448,7 @@ def test_generate_content_with_content_object( def test_generate_content_with_dict_format( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with dict format input (ContentDict).""" sentry_init( @@ -1457,7 +1456,7 @@ def test_generate_content_with_dict_format( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1472,10 +1471,9 @@ def test_generate_content_with_dict_format( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1483,16 +1481,14 @@ def test_generate_content_with_dict_format( ] -def test_generate_content_with_file_data( - sentry_init, capture_events, mock_genai_client -): +def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client): """Test generate_content with file_data (external file reference).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1516,10 +1512,9 @@ def test_generate_content_with_file_data( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1534,7 +1529,7 @@ def test_generate_content_with_file_data( def test_generate_content_with_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with inline_data (binary data).""" sentry_init( @@ -1542,7 +1537,7 @@ def test_generate_content_with_inline_data( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1565,10 +1560,9 @@ def test_generate_content_with_inline_data( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1580,7 +1574,7 @@ def test_generate_content_with_inline_data( def test_generate_content_with_function_response( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with function_response (tool result).""" sentry_init( @@ -1588,7 +1582,7 @@ def test_generate_content_with_function_response( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1622,10 +1616,9 @@ def test_generate_content_with_function_response( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -1635,7 +1628,7 @@ def test_generate_content_with_function_response( def test_generate_content_with_mixed_string_and_content( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( @@ -1643,7 +1636,7 @@ def test_generate_content_with_mixed_string_and_content( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1668,10 +1661,9 @@ def test_generate_content_with_mixed_string_and_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" @@ -1679,7 +1671,7 @@ def test_generate_content_with_mixed_string_and_content( def test_generate_content_with_part_object_directly( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with Part object directly (not wrapped in Content).""" sentry_init( @@ -1687,7 +1679,7 @@ def test_generate_content_with_part_object_directly( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1702,17 +1694,16 @@ def test_generate_content_with_part_object_directly( model="gemini-1.5-flash", contents=part, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] def test_generate_content_with_list_of_dicts( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """ Test generate_content with list of dict format inputs. @@ -1726,7 +1717,7 @@ def test_generate_content_with_list_of_dicts( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1745,17 +1736,16 @@ def test_generate_content_with_list_of_dicts( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] def test_generate_content_with_dict_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with dict format containing inline_data.""" sentry_init( @@ -1763,7 +1753,7 @@ def test_generate_content_with_dict_inline_data( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1784,10 +1774,9 @@ def test_generate_content_with_dict_inline_data( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1801,14 +1790,14 @@ def test_generate_content_with_dict_inline_data( def test_generate_content_without_parts_property_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1825,10 +1814,9 @@ def test_generate_content_without_parts_property_inline_data( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 @@ -1845,14 +1833,14 @@ def test_generate_content_without_parts_property_inline_data( def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1874,10 +1862,9 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" @@ -2162,7 +2149,9 @@ def test_extract_contents_messages_dict_inline_data(): """Test extract_contents_messages with dict containing inline_data""" content_dict = { "role": "user", - "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}], + "parts": [ + {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}} + ], } result = extract_contents_messages(content_dict) From ff9c5ec2f2eac0a7fa94b49b40cdd31e172c053f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 09:52:20 +0200 Subject: [PATCH 13/84] test litellm --- tests/integrations/litellm/test_litellm.py | 477 +++++++++++---------- 1 file changed, 241 insertions(+), 236 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index a8df5891ce..90807744e7 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -142,7 +142,7 @@ def __init__( def test_nonstreaming_chat_completion( reset_litellm_executor, sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -153,7 +153,7 @@ def test_nonstreaming_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -179,37 +179,36 @@ def test_nonstreaming_chat_completion( litellm_utils.executor.shutdown(wait=True) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "litellm test" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 @pytest.mark.asyncio(loop_scope="session") @@ -224,7 +223,7 @@ def test_nonstreaming_chat_completion( ) async def test_async_nonstreaming_chat_completion( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -235,7 +234,7 @@ async def test_async_nonstreaming_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -262,37 +261,36 @@ async def test_async_nonstreaming_chat_completion( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "litellm test" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 @pytest.mark.parametrize( @@ -307,7 +305,7 @@ async def test_async_nonstreaming_chat_completion( def test_streaming_chat_completion( reset_litellm_executor, sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -319,7 +317,7 @@ def test_streaming_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") messages = [{"role": "user", "content": "Hello!"}] @@ -350,20 +348,18 @@ def test_streaming_chat_completion( streaming_handler.executor.shutdown(wait=True) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["op"] == OP.GEN_AI_CHAT - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio(loop_scope="session") @@ -378,7 +374,7 @@ def test_streaming_chat_completion( ) async def test_async_streaming_chat_completion( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -391,7 +387,7 @@ async def test_async_streaming_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -425,25 +421,23 @@ async def test_async_streaming_chat_completion( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["op"] == OP.GEN_AI_CHAT - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True def test_embeddings_create( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -459,7 +453,7 @@ def test_embeddings_create( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -485,32 +479,34 @@ def test_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS - assert span["description"] == "embeddings text-embedding-ada-002" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["name"] == "embeddings text-embedding-ada-002" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-ada-002" + ) # Check that embeddings input is captured (it's JSON serialized) - embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] assert json.loads(embeddings_input) == ["Hello, world!"] @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_create( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -526,7 +522,7 @@ async def test_async_embeddings_create( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -553,31 +549,33 @@ async def test_async_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS - assert span["description"] == "embeddings text-embedding-ada-002" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["name"] == "embeddings text-embedding-ada-002" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-ada-002" + ) # Check that embeddings input is captured (it's JSON serialized) - embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] assert json.loads(embeddings_input) == ["Hello, world!"] def test_embeddings_create_with_list_input( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -588,7 +586,7 @@ def test_embeddings_create_with_list_input( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -614,22 +612,21 @@ def test_embeddings_create_with_list_input( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" # Check that list of embeddings input is captured (it's JSON serialized) - embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] assert json.loads(embeddings_input) == [ "First text", "Second text", @@ -640,7 +637,7 @@ def test_embeddings_create_with_list_input( @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_create_with_list_input( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -651,7 +648,7 @@ async def test_async_embeddings_create_with_list_input( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -678,22 +675,21 @@ async def test_async_embeddings_create_with_list_input( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" # Check that list of embeddings input is captured (it's JSON serialized) - embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] assert json.loads(embeddings_input) == [ "First text", "Second text", @@ -703,7 +699,7 @@ async def test_async_embeddings_create_with_list_input( def test_embeddings_no_pii( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -714,7 +710,7 @@ def test_embeddings_no_pii( traces_sample_rate=1.0, send_default_pii=False, # PII disabled ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -740,27 +736,26 @@ def test_embeddings_no_pii( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS # Check that embeddings input is NOT captured when PII is disabled - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_no_pii( sentry_init, - capture_events, + capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, @@ -771,7 +766,7 @@ async def test_async_embeddings_no_pii( traces_sample_rate=1.0, send_default_pii=False, # PII disabled ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -798,31 +793,30 @@ async def test_async_embeddings_no_pii( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(spans) == 1 span = spans[0] - assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS # Check that embeddings input is NOT captured when PII is disabled - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] def test_exception_handling( - reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response + reset_litellm_executor, sentry_init, capture_items, get_rate_limit_model_response ): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event") messages = [{"role": "user", "content": "Hello!"}] @@ -843,22 +837,24 @@ def test_exception_handling( client=client, ) - # Should have error event and transaction - assert len(events) >= 1 # Find the error event - error_events = [e for e in events if e.get("level") == "error"] + error_events = [ + item.payload + for item in items + if item.type == "event" and item.payload.get("level") == "error" + ] assert len(error_events) == 1 @pytest.mark.asyncio(loop_scope="session") async def test_async_exception_handling( - sentry_init, capture_events, get_rate_limit_model_response + sentry_init, capture_items, get_rate_limit_model_response ): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event") messages = [{"role": "user", "content": "Hello!"}] @@ -879,17 +875,19 @@ async def test_async_exception_handling( client=client, ) - # Should have error event and transaction - assert len(events) >= 1 # Find the error event - error_events = [e for e in events if e.get("level") == "error"] + error_events = [ + item.payload + for item in items + if item.type == "event" and item.payload.get("level") == "error" + ] assert len(error_events) == 1 def test_span_origin( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -897,7 +895,7 @@ def test_span_origin( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -923,16 +921,17 @@ def test_span_origin( litellm_utils.executor.shutdown(wait=True) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.litellm" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm" def test_multiple_providers( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, nonstreaming_anthropic_model_response, @@ -943,7 +942,7 @@ def test_multiple_providers( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction") messages = [{"role": "user", "content": "Hello!"}] @@ -1015,18 +1014,19 @@ def test_multiple_providers( litellm_utils.executor.shutdown(wait=True) + events = [item.payload for item in items if item.type == "transaction"] assert len(events) == 3 - for i in range(3): - span = events[i]["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + for span in spans: # The provider should be detected by litellm.get_llm_provider - assert SPANDATA.GEN_AI_SYSTEM in span["data"] + assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] @pytest.mark.asyncio(loop_scope="session") async def test_async_multiple_providers( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, nonstreaming_anthropic_model_response, @@ -1037,7 +1037,7 @@ async def test_async_multiple_providers( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -1112,18 +1112,19 @@ async def test_async_multiple_providers( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) + events = [item.payload for item in items if item.type == "transaction"] assert len(events) == 3 - for i in range(3): - span = events[i]["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + for span in spans: # The provider should be detected by litellm.get_llm_provider - assert SPANDATA.GEN_AI_SYSTEM in span["data"] + assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] def test_additional_parameters( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1132,7 +1133,7 @@ def test_additional_parameters( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = OpenAI(api_key="test-key") @@ -1162,26 +1163,27 @@ def test_additional_parameters( litellm_utils.executor.shutdown(wait=True) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 @pytest.mark.asyncio(loop_scope="session") async def test_async_additional_parameters( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1190,7 +1192,7 @@ async def test_async_additional_parameters( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = AsyncOpenAI(api_key="test-key") @@ -1221,26 +1223,27 @@ async def test_async_additional_parameters( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 def test_no_integration( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1248,7 +1251,7 @@ def test_no_integration( sentry_init( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = OpenAI(api_key="test-key") @@ -1273,13 +1276,12 @@ def test_no_integration( litellm_utils.executor.shutdown(wait=True) - (event,) = events - # Should still have the transaction, but no child spans since integration is off - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 0 @@ -1287,7 +1289,7 @@ def test_no_integration( @pytest.mark.asyncio(loop_scope="session") async def test_async_no_integration( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1295,7 +1297,7 @@ async def test_async_no_integration( sentry_init( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = AsyncOpenAI(api_key="test-key") @@ -1321,24 +1323,23 @@ async def test_async_no_integration( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - (event,) = events - # Should still have the transaction, but no child spans since integration is off - assert event["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 0 -def test_response_without_usage(sentry_init, capture_events): +def test_response_without_usage(sentry_init, capture_items): """Test handling of responses without usage information.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -1366,12 +1367,11 @@ def test_response_without_usage(sentry_init, capture_events): datetime.now(), ) - (event,) = events - (span,) = event["spans"] + (span,) = (item.payload for item in items if item.type == "span") # Span should still be created even without usage info - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat gpt-3.5-turbo" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" def test_integration_setup(sentry_init): @@ -1387,14 +1387,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -def test_litellm_message_truncation(sentry_init, capture_events): +def test_litellm_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1422,25 +1422,24 @@ def test_litellm_message_truncation(sentry_init, capture_events): datetime.now(), ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -1452,7 +1451,7 @@ def test_litellm_message_truncation(sentry_init, capture_events): def test_binary_content_encoding_image_url( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1461,7 +1460,7 @@ def test_binary_content_encoding_image_url( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1498,15 +1497,16 @@ def test_binary_content_encoding_image_url( litellm_utils.executor.shutdown(wait=True) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) blob_item = next( ( @@ -1530,7 +1530,7 @@ def test_binary_content_encoding_image_url( @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_image_url( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1539,7 +1539,7 @@ async def test_async_binary_content_encoding_image_url( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1577,15 +1577,16 @@ async def test_async_binary_content_encoding_image_url( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) blob_item = next( ( @@ -1609,7 +1610,7 @@ async def test_async_binary_content_encoding_image_url( def test_binary_content_encoding_mixed_content( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1618,7 +1619,7 @@ def test_binary_content_encoding_mixed_content( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1656,15 +1657,16 @@ def test_binary_content_encoding_mixed_content( litellm_utils.executor.shutdown(wait=True) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content_items = [ item for msg in messages_data if "content" in msg for item in msg["content"] @@ -1676,7 +1678,7 @@ def test_binary_content_encoding_mixed_content( @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_mixed_content( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1685,7 +1687,7 @@ async def test_async_binary_content_encoding_mixed_content( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1724,15 +1726,16 @@ async def test_async_binary_content_encoding_mixed_content( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content_items = [ item for msg in messages_data if "content" in msg for item in msg["content"] @@ -1744,7 +1747,7 @@ async def test_async_binary_content_encoding_mixed_content( def test_binary_content_encoding_uri_type( reset_litellm_executor, sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1753,7 +1756,7 @@ def test_binary_content_encoding_uri_type( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1789,15 +1792,16 @@ def test_binary_content_encoding_uri_type( litellm_utils.executor.shutdown(wait=True) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( ( @@ -1816,7 +1820,7 @@ def test_binary_content_encoding_uri_type( @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_uri_type( sentry_init, - capture_events, + capture_items, get_model_response, nonstreaming_chat_completions_model_response, ): @@ -1825,7 +1829,7 @@ async def test_async_binary_content_encoding_uri_type( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1862,15 +1866,16 @@ async def test_async_binary_content_encoding_uri_type( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - (event,) = events + spans = [item.payload for item in items if item.type == "span"] chat_spans = list( x - for x in event["spans"] - if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( ( From b92ae36dcfa27debc12b7c5bcaa7793434fec187 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 10:12:04 +0200 Subject: [PATCH 14/84] test huggingface_hub --- .../huggingface_hub/test_huggingface_hub.py | 231 +++++++++++------- 1 file changed, 139 insertions(+), 92 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 9dd15ca4b5..6b4402bc52 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -471,7 +471,7 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock): @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_text_generation_api: "Any", @@ -481,7 +481,7 @@ def test_text_generation( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = InferenceClient(model="test-model") @@ -492,23 +492,22 @@ def test_text_generation( details=True, ) - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.text_completion" - assert span["description"] == "text_completion test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "text_completion", @@ -516,6 +515,14 @@ def test_text_generation( "gen_ai.response.finish_reasons": "length", "gen_ai.response.streaming": False, "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -528,10 +535,10 @@ def test_text_generation( assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data # text generation does not set the response model - assert "gen_ai.response.model" not in span["data"] + assert "gen_ai.response.model" not in span["attributes"] @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @@ -539,7 +546,7 @@ def test_text_generation( @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation_streaming( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_text_generation_api_streaming: "Any", @@ -549,7 +556,7 @@ def test_text_generation_streaming( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = InferenceClient(model="test-model") @@ -561,23 +568,22 @@ def test_text_generation_streaming( ): pass - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.text_completion" - assert span["description"] == "text_completion test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "text_completion", @@ -585,6 +591,14 @@ def test_text_generation_streaming( "gen_ai.response.finish_reasons": "length", "gen_ai.response.streaming": True, "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -597,10 +611,10 @@ def test_text_generation_streaming( assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data # text generation does not set the response model - assert "gen_ai.response.model" not in span["data"] + assert "gen_ai.response.model" not in span["attributes"] @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @@ -608,7 +622,7 @@ def test_text_generation_streaming( @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_chat_completion_api: "Any", @@ -618,7 +632,7 @@ def test_chat_completion( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = get_hf_provider_inference_client() @@ -628,23 +642,22 @@ def test_chat_completion( stream=False, ) - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -655,6 +668,14 @@ def test_chat_completion( "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 8, "gen_ai.usage.total_tokens": 18, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -671,7 +692,7 @@ def test_chat_completion( assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @@ -679,7 +700,7 @@ def test_chat_completion( @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_chat_completion_api_streaming: "Any", @@ -689,7 +710,7 @@ def test_chat_completion_streaming( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = get_hf_provider_inference_client() @@ -701,23 +722,22 @@ def test_chat_completion_streaming( ) ) - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -725,6 +745,14 @@ def test_chat_completion_streaming( "gen_ai.response.finish_reasons": "stop", "gen_ai.response.model": "test-model-123", "gen_ai.response.streaming": True, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -744,15 +772,15 @@ def test_chat_completion_streaming( assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_chat_completion_api_error( - sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any" + sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -762,32 +790,29 @@ def test_chat_completion_api_error( messages=[{"role": "user", "content": "Hello!"}], ) - ( - error, - transaction, - ) = events - + (error,) = (item.payload for item in items if item.type == "event") assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" assert not error["exception"]["values"][0]["mechanism"]["handled"] + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" - assert span["status"] == "internal_error" - assert span.get("tags", {}).get("status") == "internal_error" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + assert span["status"] == "error" + (transaction,) = (item.payload for item in items if item.type == "transaction") assert ( error["contexts"]["trace"]["trace_id"] == transaction["contexts"]["trace"]["trace_id"] @@ -795,18 +820,26 @@ def test_chat_completion_api_error( expected_data = { "gen_ai.operation.name": "chat", "gen_ai.request.model": "test-model", + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert span["data"] == expected_data + assert span["attributes"] == expected_data @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_span_status_error( - sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any" + sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -816,22 +849,22 @@ def test_span_status_error( messages=[{"role": "user", "content": "Hello!"}], ) - (error, transaction) = events + (error,) = [item.payload for item in items if item.type == "event"] assert error["level"] == "error" + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @@ -839,7 +872,7 @@ def test_span_status_error( @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_with_tools( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_chat_completion_api_tools: "Any", @@ -849,7 +882,7 @@ def test_chat_completion_with_tools( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = get_hf_provider_inference_client() @@ -875,23 +908,22 @@ def test_chat_completion_with_tools( tool_choice="auto", ) - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -902,6 +934,14 @@ def test_chat_completion_with_tools( "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 8, "gen_ai.usage.total_tokens": 18, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -919,7 +959,7 @@ def test_chat_completion_with_tools( assert "gen_ai.response.text" not in expected_data assert "gen_ai.response.tool_calls" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @@ -927,7 +967,7 @@ def test_chat_completion_with_tools( @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming_with_tools( sentry_init: "Any", - capture_events: "Any", + capture_items: "Any", send_default_pii: "Any", include_prompts: "Any", mock_hf_chat_completion_api_streaming_tools: "Any", @@ -937,7 +977,7 @@ def test_chat_completion_streaming_with_tools( send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - events = capture_events() + items = capture_items("transaction", "span") client = get_hf_provider_inference_client() @@ -966,23 +1006,22 @@ def test_chat_completion_streaming_with_tools( ) ) - (transaction,) = events - + spans = [item.payload for item in items if item.type == "span"] span = None - for sp in transaction["spans"]: - if sp["op"].startswith("gen_ai"): + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): assert span is None, "there is exactly one gen_ai span" span = sp else: # there should be no other spans, just the gen_ai span # and optionally some http.client spans from talking to the hf api - assert sp["op"] == "http.client" + assert sp["attributes"]["sentry.op"] == "http.client" assert span is not None - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -991,6 +1030,14 @@ def test_chat_completion_streaming_with_tools( "gen_ai.response.finish_reasons": "tool_calls", "gen_ai.response.model": "test-model-123", "gen_ai.response.streaming": True, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": "2.58.0", + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1014,4 +1061,4 @@ def test_chat_completion_streaming_with_tools( assert "gen_ai.response.text" not in expected_data assert "gen_ai.response.tool_calls" not in expected_data - assert span["data"] == expected_data + assert span["attributes"] == expected_data From 907ca1d981ac652ce8e31015f5addd4af04316c1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 10:31:41 +0200 Subject: [PATCH 15/84] test langchain --- .../integrations/langchain/test_langchain.py | 590 ++++++++++-------- 1 file changed, 319 insertions(+), 271 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 498a5d6f4a..f709d12129 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -97,7 +97,7 @@ def _llm_type(self) -> str: def test_langchain_text_completion( sentry_init, - capture_events, + capture_items, get_model_response, ): sentry_init( @@ -109,7 +109,7 @@ def test_langchain_text_completion( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") model_response = get_model_response( Completion( @@ -149,25 +149,29 @@ def test_langchain_text_completion( input_text = "What is the capital of France?" model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"}) - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] llm_spans = [ span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["description"] == "text_completion gpt-3.5-turbo" - assert llm_span["data"]["gen_ai.system"] == "openai" - assert llm_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" - assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" - assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris." - assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 - assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 + assert llm_span["name"] == "text_completion gpt-3.5-turbo" + assert llm_span["attributes"]["gen_ai.system"] == "openai" + assert llm_span["attributes"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo" + assert ( + llm_span["attributes"]["gen_ai.response.text"] + == "The capital of France is Paris." + ) + assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25 + assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15 @pytest.mark.skipif( @@ -196,7 +200,7 @@ def test_langchain_text_completion( ) def test_langchain_create_agent( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, system_instructions_content, @@ -213,7 +217,7 @@ def test_langchain_create_agent( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") model_response = get_model_response( nonstreaming_responses_model_response, @@ -250,22 +254,23 @@ def test_langchain_create_agent( }, ) - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" - chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") assert len(chat_spans) == 1 - assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat" - assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 - assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 - assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 + assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30 if send_default_pii and include_prompts: assert ( - chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hello, how can I help you?" ) @@ -276,7 +281,9 @@ def test_langchain_create_agent( "type": "text", "content": "You are very powerful assistant, but don't know current events", } - ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) else: assert [ { @@ -287,11 +294,17 @@ def test_langchain_create_agent( "type": "text", "content": "Be concise and clear.", }, - ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) @pytest.mark.skipif( @@ -309,7 +322,7 @@ def test_langchain_create_agent( ) def test_tool_execution_span( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -324,7 +337,7 @@ def test_tool_execution_span( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") responses = responses_tool_call_model_responses( tool_name="get_word_length", @@ -400,60 +413,71 @@ def test_tool_execution_span( }, ) - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" - chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") assert len(chat_spans) == 2 - tool_exec_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + tool_exec_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) assert len(tool_exec_spans) == 1 tool_exec_span = tool_exec_spans[0] - assert chat_spans[0]["origin"] == "auto.ai.langchain" - assert chat_spans[1]["origin"] == "auto.ai.langchain" - assert tool_exec_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 - assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" - assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 - assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat" + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat" if send_default_pii and include_prompts: - assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] - assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "attributes", {} + ), ( "Tool calls should be recorded when send_default_pii=True and include_prompts=True" ) - tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + tool_calls_data = chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] assert isinstance(tool_calls_data, str) assert "get_word_length" in tool_calls_data else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) - assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) - assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) # Verify tool calls are NOT recorded when PII is disabled assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( - "data", {} + "attributes", {} ), ( f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " f"and include_prompts={include_prompts}" ) assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( - "data", {} + "attributes", {} ), ( f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " f"and include_prompts={include_prompts}" @@ -461,7 +485,7 @@ def test_tool_execution_span( # Verify that available tools are always recorded regardless of PII settings for chat_span in chat_spans: - tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] assert "get_word_length" in tools_data @@ -488,7 +512,7 @@ def test_tool_execution_span( ) def test_langchain_openai_tools_agent( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, system_instructions_content, @@ -505,7 +529,7 @@ def test_langchain_openai_tools_agent( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -700,40 +724,47 @@ def test_langchain_openai_tools_agent( with start_transaction(): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" - invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") - chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") - tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") + tool_exec_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) assert len(chat_spans) == 2 - assert invoke_agent_span["origin"] == "auto.ai.langchain" - assert chat_spans[0]["origin"] == "auto.ai.langchain" - assert chat_spans[1]["origin"] == "auto.ai.langchain" - assert tool_exec_span["origin"] == "auto.ai.langchain" + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" # We can't guarantee anything about the "shape" of the langchain execution graph - assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + assert ( + len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0 + ) # Token usage is only available in newer versions of langchain (v0.2+) # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: - assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: - assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: - assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] - assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) param_id = request.node.callspec.id if "string" in param_id: @@ -742,7 +773,9 @@ def test_langchain_openai_tools_agent( "type": "text", "content": "You are very powerful assistant, but don't know current events", } - ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) else: assert [ { @@ -753,15 +786,21 @@ def test_langchain_openai_tools_agent( "type": "text", "content": "Be concise and clear.", }, - ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) - assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "attributes", {} + ), ( "Tool calls should be recorded when send_default_pii=True and include_prompts=True" ) - tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + tool_calls_data = chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] assert isinstance(tool_calls_data, (list, str)) # Could be serialized if isinstance(tool_calls_data, str): assert "get_word_length" in tool_calls_data @@ -770,45 +809,55 @@ def test_langchain_openai_tools_agent( tool_call_str = str(tool_calls_data) assert "get_word_length" in tool_call_str else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) - assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) - assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) # Verify tool calls are NOT recorded when PII is disabled assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( - "data", {} + "attributes", {} ), ( f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " f"and include_prompts={include_prompts}" ) assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( - "data", {} + "attributes", {} ), ( f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " f"and include_prompts={include_prompts}" ) # Verify finish_reasons is always an array of strings - assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ "function_call" ] - assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] # Verify that available tools are always recorded regardless of PII settings for chat_span in chat_spans: - tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] assert tools_data is not None, ( "Available tools should always be recorded regardless of PII settings" ) assert "get_word_length" in tools_data -def test_langchain_error(sentry_init, capture_events): +def test_langchain_error(sentry_init, capture_items): global llm_type llm_type = "acme-llm" @@ -817,7 +866,7 @@ def test_langchain_error(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("event", "transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -843,11 +892,11 @@ def test_langchain_error(sentry_init, capture_events): with start_transaction(), pytest.raises(ValueError): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - error = events[0] + error = next(item.payload for item in items if item.type == "event") assert error["level"] == "error" -def test_span_status_error(sentry_init, capture_events): +def test_span_status_error(sentry_init, capture_items): global llm_type llm_type = "acme-llm" @@ -855,7 +904,7 @@ def test_span_status_error(sentry_init, capture_events): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction", "span") with start_transaction(name="test"): prompt = ChatPromptTemplate.from_messages( @@ -884,10 +933,13 @@ def test_span_status_error(sentry_init, capture_events): with pytest.raises(ValueError): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - (error, transaction) = events + error = next(item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" @@ -1100,7 +1152,7 @@ def test_langchain_callback_list_existing_callback(sentry_init): assert handler is sentry_callback -def test_langchain_message_role_mapping(sentry_init, capture_events): +def test_langchain_message_role_mapping(sentry_init, capture_items): """Test that message roles are properly normalized in langchain integration.""" global llm_type llm_type = "openai-chat" @@ -1110,7 +1162,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -1146,19 +1198,18 @@ def test_langchain_message_role_mapping(sentry_init, capture_events): with start_transaction(): list(agent_executor.stream({"input": test_input})) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find spans with gen_ai operation that should have message data gen_ai_spans = [ - span for span in tx.get("spans", []) if span.get("op", "").startswith("gen_ai") + span + for span in spans + if span["attributes"].get("sentry.op", "").startswith("gen_ai") ] # Check if any span has message data with normalized roles message_data_found = False for span in gen_ai_spans: - span_data = span.get("data", {}) + span_data = span.get("attributes", {}) if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data: message_data_found = True messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] @@ -1239,7 +1290,7 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -def test_langchain_message_truncation(sentry_init, capture_events): +def test_langchain_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in Langchain integration.""" from langchain_core.outputs import LLMResult, Generation @@ -1248,7 +1299,7 @@ def test_langchain_message_truncation(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -1291,23 +1342,23 @@ def test_langchain_message_truncation(sentry_init, capture_events): ) callback.on_llm_end(response=response, run_id=run_id) - assert len(events) > 0 - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] llm_spans = [ span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline" + assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] - messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] + messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -1327,7 +1378,7 @@ def test_langchain_message_truncation(sentry_init, capture_events): ], ) def test_langchain_embeddings_sync( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that sync embedding methods (embed_documents, embed_query) are properly traced.""" try: @@ -1340,7 +1391,7 @@ def test_langchain_embeddings_sync( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API call with mock.patch.object( @@ -1362,27 +1413,28 @@ def test_langchain_embeddings_sync( assert len(result) == 2 mock_embed_documents.assert_called_once() - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings span embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["description"] == "embeddings text-embedding-ada-002" - assert embeddings_span["origin"] == "auto.ai.langchain" - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" - assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert embeddings_span["name"] == "embeddings text-embedding-ada-002" + assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) # Check if input is captured based on PII settings if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] - input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Could be serialized as string if isinstance(input_data, str): assert "Hello world" in input_data @@ -1391,7 +1443,9 @@ def test_langchain_embeddings_sync( assert "Hello world" in input_data assert "Test document" in input_data else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) @pytest.mark.parametrize( @@ -1402,7 +1456,7 @@ def test_langchain_embeddings_sync( ], ) def test_langchain_embeddings_embed_query( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that embed_query method is properly traced.""" try: @@ -1415,7 +1469,7 @@ def test_langchain_embeddings_embed_query( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API call with mock.patch.object( @@ -1436,32 +1490,35 @@ def test_langchain_embeddings_embed_query( assert len(result) == 3 mock_embed_query.assert_called_once() - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings span embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" - assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) # Check if input is captured based on PII settings if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] - input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Could be serialized as string if isinstance(input_data, str): assert "What is the capital of France?" in input_data else: assert "What is the capital of France?" in input_data else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) @pytest.mark.parametrize( @@ -1473,7 +1530,7 @@ def test_langchain_embeddings_embed_query( ) @pytest.mark.asyncio async def test_langchain_embeddings_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that async embedding methods (aembed_documents, aembed_query) are properly traced.""" try: @@ -1486,7 +1543,7 @@ async def test_langchain_embeddings_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") async def mock_aembed_documents(self, texts): return [[0.1, 0.2, 0.3] for _ in texts] @@ -1512,38 +1569,41 @@ async def mock_aembed_documents(self, texts): assert len(result) == 2 mock_aembed.assert_called_once() - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings span embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["description"] == "embeddings text-embedding-ada-002" - assert embeddings_span["origin"] == "auto.ai.langchain" - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" - assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert embeddings_span["name"] == "embeddings text-embedding-ada-002" + assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) # Check if input is captured based on PII settings if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] - input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Could be serialized as string if isinstance(input_data, str): assert "Async hello" in input_data or "Async test document" in input_data else: assert "Async hello" in input_data or "Async test document" in input_data else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) @pytest.mark.asyncio -async def test_langchain_embeddings_aembed_query(sentry_init, capture_events): +async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): """Test that aembed_query method is properly traced.""" try: from langchain_openai import OpenAIEmbeddings @@ -1555,7 +1615,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") async def mock_aembed_query(self, text): return [0.1, 0.2, 0.3] @@ -1579,24 +1639,25 @@ async def mock_aembed_query(self, text): assert len(result) == 3 mock_aembed.assert_called_once() - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings span embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" - assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) # Check if input is captured - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] - input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Could be serialized as string if isinstance(input_data, str): assert "Async query test" in input_data @@ -1604,7 +1665,7 @@ async def mock_aembed_query(self, text): assert "Async query test" in input_data -def test_langchain_embeddings_no_model_name(sentry_init, capture_events): +def test_langchain_embeddings_no_model_name(sentry_init, capture_items): """Test embeddings when model name is not available.""" try: from langchain_openai import OpenAIEmbeddings @@ -1615,7 +1676,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events): integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API call and remove model attribute with mock.patch.object( @@ -1635,28 +1696,26 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events): with start_transaction(name="test_embeddings_no_model"): embeddings.embed_documents(["Test"]) - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings span embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["description"] == "embeddings" - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["name"] == "embeddings" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" # Model name should not be set if not available assert ( - "gen_ai.request.model" not in embeddings_span["data"] - or embeddings_span["data"]["gen_ai.request.model"] is None + "gen_ai.request.model" not in embeddings_span["attributes"] + or embeddings_span["attributes"]["gen_ai.request.model"] is None ) -def test_langchain_embeddings_integration_disabled(sentry_init, capture_events): +def test_langchain_embeddings_integration_disabled(sentry_init, capture_items): """Test that embeddings are not traced when integration is disabled.""" try: from langchain_openai import OpenAIEmbeddings @@ -1665,7 +1724,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events): # Initialize without LangchainIntegration sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( OpenAIEmbeddings, @@ -1680,18 +1739,17 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events): embeddings.embed_documents(["Test"]) # Check that no embeddings spans were created - if events: - tx = events[0] - embeddings_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.embeddings" - ] - # Should be empty since integration is disabled - assert len(embeddings_spans) == 0 + spans = [item.payload for item in items if item.type == "span"] + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] + # Should be empty since integration is disabled + assert len(embeddings_spans) == 0 -def test_langchain_embeddings_multiple_providers(sentry_init, capture_events): +def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): """Test that embeddings work with different providers.""" try: from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings @@ -1703,7 +1761,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock both providers with mock.patch.object( @@ -1731,26 +1789,24 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events): openai_embeddings.embed_documents(["OpenAI test"]) azure_embeddings.embed_documents(["Azure test"]) - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings spans embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] # Should have 2 spans, one for each provider assert len(embeddings_spans) == 2 # Verify both spans have proper data for span in embeddings_spans: - assert span["data"]["gen_ai.operation.name"] == "embeddings" - assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + assert span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] -def test_langchain_embeddings_error_handling(sentry_init, capture_events): +def test_langchain_embeddings_error_handling(sentry_init, capture_items): """Test that errors in embeddings are properly captured.""" try: from langchain_openai import OpenAIEmbeddings @@ -1762,7 +1818,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the API call to raise an error with mock.patch.object( @@ -1781,15 +1837,16 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events): with pytest.raises(ValueError): embeddings.embed_documents(["Test"]) - # The error should be captured - assert len(events) >= 1 - # We should have both the transaction and potentially an error event - [e for e in events if e.get("level") == "error"] + [ + item.payload + for item in items + if item.type == "event" and item.payload.get("level") == "error" + ] # Note: errors might not be auto-captured depending on SDK settings, # but the span should still be created -def test_langchain_embeddings_multiple_calls(sentry_init, capture_events): +def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): """Test that multiple embeddings calls within a transaction are all traced.""" try: from langchain_openai import OpenAIEmbeddings @@ -1801,7 +1858,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API calls with mock.patch.object( @@ -1828,32 +1885,31 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events): # Call embed_documents again embeddings.embed_documents(["Third batch"]) - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query) embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 3 # Verify all spans have proper data for span in embeddings_spans: - assert span["data"]["gen_ai.operation.name"] == "embeddings" - assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + assert span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] # Verify the input data is different for each span input_data_list = [ - span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans + span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + for span in embeddings_spans ] # They should all be different (different inputs) assert len(set(str(data) for data in input_data_list)) == 3 -def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events): +def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): """Test that embeddings spans are properly nested within parent spans.""" try: from langchain_openai import OpenAIEmbeddings @@ -1865,7 +1921,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API call with mock.patch.object( @@ -1884,15 +1940,15 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events): with sentry_sdk.start_span(op="custom", name="custom operation"): embeddings.embed_documents(["Test within custom span"]) - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find all spans embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] + + tx = next(item.payload for item in items if item.type == "transaction") custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"] assert len(embeddings_spans) == 1 @@ -1902,11 +1958,11 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events): embeddings_span = embeddings_spans[0] custom_span = custom_spans[0] - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" assert custom_span["description"] == "custom operation" -def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_events): +def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_items): """Test that embeddings correctly handle both list and string inputs.""" try: from langchain_openai import OpenAIEmbeddings @@ -1918,7 +1974,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the actual API calls with mock.patch.object( @@ -1943,21 +1999,19 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e # embed_query takes a string embeddings.embed_query("Single string query") - # Check captured events - assert len(events) >= 1 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] # Find embeddings spans embeddings_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] assert len(embeddings_spans) == 2 # Both should have input data captured as lists for span in embeddings_spans: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] - input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] + input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Input should be normalized to list format if isinstance(input_data, str): # If serialized, should contain the input text @@ -1975,7 +2029,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e ) def test_langchain_response_model_extraction( sentry_init, - capture_events, + capture_items, response_metadata_model, expected_model, ): @@ -1984,7 +2038,7 @@ def test_langchain_response_model_extraction( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2009,25 +2063,22 @@ def test_langchain_response_model_extraction( response = Mock(generations=[[generation]]) callback.on_llm_end(response=response, run_id=run_id) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] llm_spans = [ span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" if expected_model is not None: - assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"] - assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model + assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"] + assert llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model else: - assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {}) # Tests for multimodal content transformation functions @@ -2286,13 +2337,13 @@ def test_transform_google_file_data(self): ], ) def test_langchain_ai_system_detection( - sentry_init, capture_events, ai_type, expected_system + sentry_init, capture_items, ai_type, expected_system ): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2312,23 +2363,20 @@ def test_langchain_ai_system_detection( response = Mock(generations=[[generation]]) callback.on_llm_end(response=response, run_id=run_id) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] llm_spans = [ span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] assert len(llm_spans) > 0 llm_span = llm_spans[0] if expected_system is not None: - assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system + assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system else: - assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {}) class TestTransformLangchainMessageContent: From b2542976f0f43bd1160f07f2a6783919d9861588 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 10:35:14 +0200 Subject: [PATCH 16/84] test langgraph --- .../integrations/langgraph/test_langgraph.py | 386 ++++++++++-------- 1 file changed, 205 insertions(+), 181 deletions(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 2a385d8a78..e1a3baa0a8 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -147,7 +147,7 @@ def test_langgraph_integration_init(): ], ) def test_state_graph_compile( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test StateGraph.compile() wrapper creates proper create_agent span.""" sentry_init( @@ -155,7 +155,7 @@ def test_state_graph_compile( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") graph = MockStateGraph() def original_compile(self, *args, **kwargs): @@ -171,21 +171,23 @@ def original_compile(self, *args, **kwargs): assert compiled_graph is not None assert compiled_graph.name == "test_graph" - tx = events[0] - assert tx["type"] == "transaction" - - agent_spans = [span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT] + spans = [item.payload for item in items if item.type == "span"] + agent_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT + ] assert len(agent_spans) == 1 agent_span = agent_spans[0] - assert agent_span["description"] == "create_agent test_graph" - assert agent_span["origin"] == "auto.ai.langgraph" - assert agent_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent" - assert agent_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - assert agent_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model" - assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["data"] - - tools_data = agent_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert agent_span["name"] == "create_agent test_graph" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent" + assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model" + assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"] + + tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] assert tools_data == ["search_tool", "calculator"] assert len(tools_data) == 2 assert "search_tool" in tools_data @@ -201,14 +203,14 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts): +def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_prompts): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -245,26 +247,26 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent test_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + assert invoke_span["name"] == "invoke_agent test_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + request_messages = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] if isinstance(request_messages, str): import json @@ -273,11 +275,11 @@ def original_invoke(self, *args, **kwargs): assert len(request_messages) == 1 assert request_messages[0]["content"] == "Of course! How can I assist you?" - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert response_text == expected_assistant_response - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] if isinstance(tool_calls_data, str): import json @@ -287,9 +289,11 @@ def original_invoke(self, *args, **kwargs): assert tool_calls_data[0]["id"] == "call_test_123" assert tool_calls_data[0]["function"]["name"] == "search_tool" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) @pytest.mark.parametrize( @@ -301,14 +305,14 @@ def original_invoke(self, *args, **kwargs): (False, False), ], ) -def test_pregel_ainvoke(sentry_init, capture_events, send_default_pii, include_prompts): +def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_prompts): """Test Pregel.ainvoke() async wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("What's the weather like?", name="user")]} pregel = MockPregelInstance("async_graph") @@ -341,30 +345,30 @@ async def run_test(): result = asyncio.run(run_test()) assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent async_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph" + assert invoke_span["name"] == "invoke_agent async_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert response_text == expected_assistant_response - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] if isinstance(tool_calls_data, str): import json @@ -374,19 +378,21 @@ async def run_test(): assert tool_calls_data[0]["id"] == "call_weather_456" assert tool_calls_data[0]["function"]["name"] == "get_weather" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) -def test_pregel_invoke_error(sentry_init, capture_events): +def test_pregel_invoke_error(sentry_init, capture_items): """Test error handling during graph execution.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail")]} pregel = MockPregelInstance("error_graph") @@ -397,25 +403,26 @@ def original_invoke(self, *args, **kwargs): wrapped_invoke = _wrap_pregel_invoke(original_invoke) wrapped_invoke(pregel, test_state) - tx = events[0] + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span.get("status") == "internal_error" - assert invoke_span.get("tags", {}).get("status") == "internal_error" + assert invoke_span.get("status") == "error" -def test_pregel_ainvoke_error(sentry_init, capture_events): +def test_pregel_ainvoke_error(sentry_init, capture_items): """Test error handling during async graph execution.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail async")]} pregel = MockPregelInstance("async_error_graph") @@ -431,24 +438,25 @@ async def run_error_test(): asyncio.run(run_error_test()) - tx = events[0] + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span.get("status") == "internal_error" - assert invoke_span.get("tags", {}).get("status") == "internal_error" + assert invoke_span.get("status") == "error" -def test_span_origin(sentry_init, capture_events): +def test_span_origin(sentry_init, capture_items): """Test that span origins are correctly set.""" sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") graph = MockStateGraph() @@ -461,16 +469,17 @@ def original_compile(self, *args, **kwargs): wrapped_compile = _wrap_state_graph_compile(original_compile) wrapped_compile(graph) - tx = events[0] + tx = next(item.payload for item in items if item.type == "transaction") assert tx["contexts"]["trace"]["origin"] == "manual" - for span in tx["spans"]: - assert span["origin"] == "auto.ai.langgraph" + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph" @pytest.mark.parametrize("graph_name", ["my_graph", None, ""]) def test_pregel_invoke_with_different_graph_names( - sentry_init, capture_events, graph_name + sentry_init, capture_items, graph_name ): """Test Pregel.invoke() with different graph name scenarios.""" sentry_init( @@ -478,7 +487,7 @@ def test_pregel_invoke_with_different_graph_names( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance() if not graph_name: @@ -492,25 +501,27 @@ def original_invoke(self, *args, **kwargs): wrapped_invoke = _wrap_pregel_invoke(original_invoke) wrapped_invoke(pregel, {"messages": []}) - tx = events[0] + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] if graph_name and graph_name.strip(): - assert invoke_span["description"] == "invoke_agent my_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name + assert invoke_span["name"] == "invoke_agent my_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name else: - assert invoke_span["description"] == "invoke_agent" - assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("data", {}) + assert invoke_span["name"] == "invoke_agent" + assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("attributes", {}) + assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {}) -def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events): +def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. @@ -519,7 +530,7 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events): integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -564,29 +575,29 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has usage data - assert invoke_agent_span["description"] == "invoke_agent test_graph" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] # The usage should match the mock_usage values (aggregated across all calls) - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 -def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events): +def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. @@ -595,7 +606,7 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events): integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -643,29 +654,29 @@ async def run_test(): result = asyncio.run(run_test()) assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has usage data - assert invoke_agent_span["description"] == "invoke_agent test_graph" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] # The usage should match the mock_usage values (aggregated across all calls) - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 -def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events): +def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls (e.g., when tools are used and multiple API calls are made). @@ -674,7 +685,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_e integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -730,23 +741,23 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has aggregated usage from both API calls # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 -def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events): +def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls (e.g., when tools are used and multiple API calls are made). @@ -755,7 +766,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_ integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -814,23 +825,23 @@ async def run_test(): result = asyncio.run(run_test()) assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has aggregated usage from both API calls # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 -def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events): +def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items): """ Test that invoke_agent spans include the response model. When an agent makes multiple LLM calls, it should report the last model used. @@ -839,7 +850,7 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events) integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -884,23 +895,25 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has response model - assert invoke_agent_span["description"] == "invoke_agent test_graph" - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) -def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events): +def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items): """ Test that invoke_agent spans include the response model. When an agent makes multiple LLM calls, it should report the last model used. @@ -909,7 +922,7 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -957,23 +970,25 @@ async def run_test(): result = asyncio.run(run_test()) assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span has response model - assert invoke_agent_span["description"] == "invoke_agent test_graph" - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) -def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events): +def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items): """ Test that when an agent makes multiple LLM calls (e.g., with tools), the invoke_agent span reports the last response model used. @@ -982,7 +997,7 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -1040,22 +1055,24 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) -def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_events): +def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items): """ Test that when an agent makes multiple LLM calls (e.g., with tools), the invoke_agent span reports the last response model used. @@ -1064,7 +1081,7 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_event integrations=[LanggraphIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_state = { "messages": [ @@ -1125,19 +1142,21 @@ async def run_test(): result = asyncio.run(run_test()) assert result is not None - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] # Verify invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) def test_complex_message_parsing(): @@ -1187,14 +1206,14 @@ def test_complex_message_parsing(): assert result[2]["function_call"]["name"] == "search" -def test_extraction_functions_complex_scenario(sentry_init, capture_events): +def test_extraction_functions_complex_scenario(sentry_init, capture_items): """Test extraction functions with complex scenarios including multiple messages and edge cases.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") pregel = MockPregelInstance("complex_graph") test_state = {"messages": [MockMessage("Complex request", name="user")]} @@ -1235,21 +1254,23 @@ def original_invoke(self, *args, **kwargs): assert result is not None - tx = events[0] + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert response_text == "Final response" - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] import json - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] if isinstance(tool_calls_data, str): tool_calls_data = json.loads(tool_calls_data) @@ -1260,14 +1281,14 @@ def original_invoke(self, *args, **kwargs): assert tool_calls_data[1]["function"]["name"] == "calculate" -def test_langgraph_message_role_mapping(sentry_init, capture_events): +def test_langgraph_message_role_mapping(sentry_init, capture_items): """Test that Langgraph integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock a langgraph message with mixed roles class MockMessage: @@ -1297,17 +1318,18 @@ def __init__(self, content, message_type="human"): ) wrapped_invoke(pregel, state_data) - (event,) = events - span = event["spans"][0] + span = next(item.payload for item in items if item.type == "span") # Verify that the span was created correctly - assert span["op"] == "gen_ai.invoke_agent" + assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent" # If messages were captured, verify role mapping - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]: + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]: import json - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) # Find messages with specific content to verify role mapping ai_message = next( @@ -1331,7 +1353,7 @@ def __init__(self, content, message_type="human"): assert "ai" not in roles -def test_langgraph_message_truncation(sentry_init, capture_events): +def test_langgraph_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in Langgraph integration.""" import json @@ -1340,7 +1362,7 @@ def test_langgraph_message_truncation(sentry_init, capture_events): traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1365,23 +1387,25 @@ def original_invoke(self, *args, **kwargs): result = wrapped_invoke(pregel, test_state) assert result is not None - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + spans = [item.payload for item in items if item.type == "span"] invoke_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT ] assert len(invoke_spans) > 0 invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + (tx,) = (item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From 6f7a0547707a4ed22b8e99fce7c3c948d7ca74c1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 10:52:20 +0200 Subject: [PATCH 17/84] accept any as sdk version --- .../huggingface_hub/test_huggingface_hub.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 6b4402bc52..98abbb00fa 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -596,7 +596,7 @@ def test_text_generation_streaming( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, @@ -673,7 +673,7 @@ def test_chat_completion( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, @@ -750,7 +750,7 @@ def test_chat_completion_streaming( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, @@ -825,7 +825,7 @@ def test_chat_completion_api_error( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, @@ -939,7 +939,7 @@ def test_chat_completion_with_tools( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, @@ -1035,7 +1035,7 @@ def test_chat_completion_streaming_with_tools( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, From 4f871a422c8e6b69abe5160e3629b84550b46f26 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 12:46:10 +0200 Subject: [PATCH 18/84] pydantic-ai tests --- .../pydantic_ai/test_pydantic_ai.py | 695 ++++++++++-------- 1 file changed, 369 insertions(+), 326 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 50ce155f5b..fe34dd0f5d 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -53,7 +53,7 @@ def inner(): @pytest.mark.asyncio -async def test_agent_run_async(sentry_init, capture_events, get_test_agent): +async def test_agent_run_async(sentry_init, capture_items, get_test_agent): """ Test that the integration creates spans for async agent runs. """ @@ -63,7 +63,7 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() result = await test_agent.run("Test input") @@ -71,8 +71,7 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent): assert result is not None assert result.output is not None - (transaction,) = events - spans = transaction["spans"] + (transaction,) = (item.payload for item in items if item.type == "transaction") # Verify transaction (the transaction IS the invoke_agent span) assert transaction["transaction"] == "invoke_agent test_agent" @@ -81,28 +80,31 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent): # The transaction itself should have invoke_agent data assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" + spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 # Check chat span chat_span = chat_spans[0] - assert "chat" in chat_span["description"] - assert chat_span["data"]["gen_ai.operation.name"] == "chat" - assert chat_span["data"]["gen_ai.response.streaming"] is False - assert "gen_ai.request.messages" in chat_span["data"] - assert "gen_ai.usage.input_tokens" in chat_span["data"] - assert "gen_ai.usage.output_tokens" in chat_span["data"] + assert "chat" in chat_span["name"] + assert chat_span["attributes"]["gen_ai.operation.name"] == "chat" + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + assert "gen_ai.request.messages" in chat_span["attributes"] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] + assert "gen_ai.usage.output_tokens" in chat_span["attributes"] @pytest.mark.asyncio -async def test_agent_run_async_model_error(sentry_init, capture_events): +async def test_agent_run_async_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction", "span") def failing_model(messages, info): raise RuntimeError("model exploded") @@ -115,17 +117,17 @@ def failing_model(messages, info): with pytest.raises(RuntimeError, match="model exploded"): await agent.run("Test input") - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["status"] == "internal_error" + assert spans[0]["status"] == "error" @pytest.mark.asyncio -async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_agent): +async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_agent): """ Test that the invoke_agent span includes token usage and model data. """ @@ -135,7 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_ send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() result = await test_agent.run("Test input") @@ -143,8 +145,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_ assert result is not None assert result.output is not None - (transaction,) = events - + (transaction,) = (item.payload for item in items if item.type == "transaction") # Verify transaction (the transaction IS the invoke_agent span) assert transaction["transaction"] == "invoke_agent test_agent" @@ -170,7 +171,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_ assert trace_data["gen_ai.response.model"] == "test" # Test model name -def test_agent_run_sync(sentry_init, capture_events, get_test_agent): +def test_agent_run_sync(sentry_init, capture_items, get_test_agent): """ Test that the integration creates spans for sync agent runs. """ @@ -180,7 +181,7 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() result = test_agent.run_sync("Test input") @@ -188,29 +189,31 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent): assert result is not None assert result.output is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_agent" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find span types - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 # Verify streaming flag is False for sync for chat_span in chat_spans: - assert chat_span["data"]["gen_ai.response.streaming"] is False + assert chat_span["attributes"]["gen_ai.response.streaming"] is False -def test_agent_run_sync_model_error(sentry_init, capture_events): +def test_agent_run_sync_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction", "span") def failing_model(messages, info): raise RuntimeError("model exploded") @@ -223,17 +226,17 @@ def failing_model(messages, info): with pytest.raises(RuntimeError, match="model exploded"): agent.run_sync("Test input") - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["status"] == "internal_error" + assert spans[0]["status"] == "error" @pytest.mark.asyncio -async def test_agent_run_stream(sentry_init, capture_events, get_test_agent): +async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): """ Test that the integration creates spans for streaming agent runs. """ @@ -243,7 +246,7 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() async with test_agent.run_stream("Test input") as result: @@ -251,31 +254,33 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent): async for _ in result.stream_output(): pass - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_agent" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find chat spans - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 # Verify streaming flag is True for streaming for chat_span in chat_spans: - assert chat_span["data"]["gen_ai.response.streaming"] is True - assert "gen_ai.request.messages" in chat_span["data"] - assert "gen_ai.usage.input_tokens" in chat_span["data"] + assert chat_span["attributes"]["gen_ai.response.streaming"] is True + assert "gen_ai.request.messages" in chat_span["attributes"] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] # Streaming responses should still have output data assert ( - "gen_ai.response.text" in chat_span["data"] - or "gen_ai.response.model" in chat_span["data"] + "gen_ai.response.text" in chat_span["attributes"] + or "gen_ai.response.model" in chat_span["attributes"] ) @pytest.mark.asyncio -async def test_agent_run_stream_events(sentry_init, capture_events, get_test_agent): +async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agent): """ Test that run_stream_events creates spans (it uses run internally, so non-streaming). """ @@ -285,30 +290,31 @@ async def test_agent_run_stream_events(sentry_init, capture_events, get_test_age send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Consume all events test_agent = get_test_agent() async for _ in test_agent.run_stream_events("Test input"): pass - (transaction,) = events - # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_agent" # Find chat spans - spans = transaction["spans"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 # run_stream_events uses run() internally, so streaming should be False for chat_span in chat_spans: - assert chat_span["data"]["gen_ai.response.streaming"] is False + assert chat_span["attributes"]["gen_ai.response.streaming"] is False @pytest.mark.asyncio -async def test_agent_with_tools(sentry_init, capture_events, get_test_agent): +async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): """ Test that tool execution creates execute_tool spans. """ @@ -325,34 +331,39 @@ def add_numbers(a: int, b: int) -> int: """Add two numbers together.""" return a + b - events = capture_events() + items = capture_items("transaction", "span") result = await test_agent.run("What is 5 + 3?") assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # Should have tool spans assert len(tool_spans) >= 1 # Check tool span tool_span = tool_spans[0] - assert "execute_tool" in tool_span["description"] - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in tool_span["data"] - assert "gen_ai.tool.output" in tool_span["data"] + assert "execute_tool" in tool_span["name"] + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] # Check chat spans have available_tools for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["data"] - available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] # Available tools is serialized as a string assert "add_numbers" in available_tools_str @@ -363,7 +374,7 @@ def add_numbers(a: int, b: int) -> int: ) @pytest.mark.asyncio async def test_agent_with_tool_model_retry( - sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions + sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions ): """ Test that a handled exception is captured when a tool raises ModelRetry. @@ -391,47 +402,51 @@ def add_numbers(a: int, b: int) -> float: raise ModelRetry(message="Try again with the same arguments.") return a + b - events = capture_events() + items = capture_items("event", "transaction", "span") result = await test_agent.run("What is 5 + 3?") assert result is not None if handled_tool_call_exceptions: - (error, transaction) = events - else: - (transaction,) = events - spans = transaction["spans"] - - if handled_tool_call_exceptions: + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" assert error["exception"]["values"][0]["mechanism"]["handled"] + spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # Should have tool spans assert len(tool_spans) >= 1 # Check tool spans model_retry_tool_span = tool_spans[0] - assert "execute_tool" in model_retry_tool_span["description"] - assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in model_retry_tool_span["data"] + assert "execute_tool" in model_retry_tool_span["name"] + assert ( + model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + ) + assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] tool_span = tool_spans[1] - assert "execute_tool" in tool_span["description"] - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in tool_span["data"] - assert "gen_ai.tool.output" in tool_span["data"] + assert "execute_tool" in tool_span["name"] + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] # Check chat spans have available_tools for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["data"] - available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] # Available tools is serialized as a string assert "add_numbers" in available_tools_str @@ -442,7 +457,7 @@ def add_numbers(a: int, b: int) -> float: ) @pytest.mark.asyncio async def test_agent_with_tool_validation_error( - sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions + sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions ): """ Test that a handled exception is captured when a tool has unsatisfiable constraints. @@ -464,7 +479,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: """Add two numbers together.""" return a + b - events = capture_events() + items = capture_items("event", "transaction", "span") result = None with pytest.raises(UnexpectedModelBehavior): @@ -473,42 +488,45 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: assert result is None if handled_tool_call_exceptions: - (error, model_behaviour_error, transaction) = events - else: ( + error, model_behaviour_error, - transaction, - ) = events - spans = transaction["spans"] - - if handled_tool_call_exceptions: + ) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" assert error["exception"]["values"][0]["mechanism"]["handled"] - # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # Should have tool spans assert len(tool_spans) >= 1 # Check tool spans model_retry_tool_span = tool_spans[0] - assert "execute_tool" in model_retry_tool_span["description"] - assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in model_retry_tool_span["data"] + assert "execute_tool" in model_retry_tool_span["name"] + assert ( + model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + ) + assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] # Check chat spans have available_tools for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["data"] - available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] # Available tools is serialized as a string assert "add_numbers" in available_tools_str @pytest.mark.asyncio -async def test_agent_with_tools_streaming(sentry_init, capture_events, get_test_agent): +async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_agent): """ Test that tool execution works correctly with streaming. """ @@ -525,37 +543,40 @@ def multiply(a: int, b: int) -> int: """Multiply two numbers.""" return a * b - events = capture_events() + items = capture_items("transaction", "span") async with test_agent.run_stream("What is 7 times 8?") as result: async for _ in result.stream_output(): pass - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find span types - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # Should have tool spans assert len(tool_spans) >= 1 # Verify streaming flag is True for chat_span in chat_spans: - assert chat_span["data"]["gen_ai.response.streaming"] is True + assert chat_span["attributes"]["gen_ai.response.streaming"] is True # Check tool span tool_span = tool_spans[0] - assert tool_span["data"]["gen_ai.tool.name"] == "multiply" - assert "gen_ai.tool.input" in tool_span["data"] - assert "gen_ai.tool.output" in tool_span["data"] + assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] @pytest.mark.asyncio -async def test_model_settings( - sentry_init, capture_events, get_test_agent_with_settings -): +async def test_model_settings(sentry_init, capture_items, get_test_agent_with_settings): """ Test that model settings are captured in spans. """ @@ -564,23 +585,24 @@ async def test_model_settings( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent_with_settings = get_test_agent_with_settings() await test_agent_with_settings.run("Test input") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find chat span - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 chat_span = chat_spans[0] # Check that model settings are captured - assert chat_span["data"].get("gen_ai.request.temperature") == 0.7 - assert chat_span["data"].get("gen_ai.request.max_tokens") == 100 - assert chat_span["data"].get("gen_ai.request.top_p") == 0.9 + assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7 + assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100 + assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9 @pytest.mark.asyncio @@ -594,7 +616,7 @@ async def test_model_settings( ], ) async def test_system_prompt_attribute( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """ Test that system prompts are included as the first message. @@ -611,21 +633,24 @@ async def test_system_prompt_attribute( send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") await agent.run("Hello") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 chat_span = chat_spans[0] if send_default_pii and include_prompts: - system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + system_instructions = chat_span["attributes"][ + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS + ] assert json.loads(system_instructions) == [ { "type": "text", @@ -633,11 +658,11 @@ async def test_system_prompt_attribute( } ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] @pytest.mark.asyncio -async def test_error_handling(sentry_init, capture_events): +async def test_error_handling(sentry_init, capture_items): """ Test error handling in agent execution. """ @@ -653,14 +678,13 @@ async def test_error_handling(sentry_init, capture_events): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") # Simple run that should succeed await agent.run("Hello") # At minimum, we should have a transaction - assert len(events) >= 1 - transaction = [e for e in events if e.get("type") == "transaction"][0] + transaction = next(item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_error" # Transaction should complete successfully (status key may not exist if no error) trace_status = transaction["contexts"]["trace"].get("status") @@ -668,7 +692,7 @@ async def test_error_handling(sentry_init, capture_events): @pytest.mark.asyncio -async def test_without_pii(sentry_init, capture_events, get_test_agent): +async def test_without_pii(sentry_init, capture_items, get_test_agent): """ Test that PII is not captured when send_default_pii is False. """ @@ -678,25 +702,26 @@ async def test_without_pii(sentry_init, capture_events, get_test_agent): send_default_pii=False, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Sensitive input") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Verify that messages and response text are not captured for span in chat_spans: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] @pytest.mark.asyncio -async def test_without_pii_tools(sentry_init, capture_events, get_test_agent): +async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): """ Test that tool input/output are not captured when send_default_pii is False. """ @@ -713,24 +738,27 @@ def sensitive_tool(data: str) -> str: """A tool with sensitive data.""" return f"Processed: {data}" - events = capture_events() + items = capture_items("transaction", "span") await test_agent.run("Use sensitive tool with private data") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find tool spans - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # If tool was executed, verify input/output are not captured for tool_span in tool_spans: - assert "gen_ai.tool.input" not in tool_span["data"] - assert "gen_ai.tool.output" not in tool_span["data"] + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] @pytest.mark.asyncio -async def test_multiple_agents_concurrent(sentry_init, capture_events, get_test_agent): +async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_agent): """ Test that multiple agents can run concurrently without interfering. """ @@ -739,7 +767,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_events, get_test_ traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() @@ -750,18 +778,15 @@ async def run_agent(input_text): results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) assert len(results) == 3 - assert len(events) == 3 # Verify each transaction is separate + events = [item.payload for item in items if item.type == "transaction"] for i, transaction in enumerate(events): - assert transaction["type"] == "transaction" assert transaction["transaction"] == "invoke_agent test_agent" - # Each should have its own spans - assert len(transaction["spans"]) >= 1 @pytest.mark.asyncio -async def test_message_history(sentry_init, capture_events): +async def test_message_history(sentry_init, capture_items): """ Test that full conversation history is captured in chat spans. """ @@ -776,7 +801,7 @@ async def test_message_history(sentry_init, capture_events): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # First message await agent.run("Hello, I'm Alice") @@ -797,23 +822,26 @@ async def test_message_history(sentry_init, capture_events): await agent.run("What is my name?", message_history=history) # We should have 2 transactions + events = [item.payload for item in items if item.type == "transaction"] assert len(events) >= 2 # Check the second transaction has the full history second_transaction = events[1] spans = second_transaction["spans"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] if chat_spans: chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["data"]: - messages_data = chat_span["data"]["gen_ai.request.messages"] + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_data = chat_span["attributes"]["gen_ai.request.messages"] # Should have multiple messages including history assert len(messages_data) > 1 @pytest.mark.asyncio -async def test_gen_ai_system(sentry_init, capture_events, get_test_agent): +async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): """ Test that gen_ai.system is set from the model. """ @@ -822,26 +850,27 @@ async def test_gen_ai_system(sentry_init, capture_events, get_test_agent): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Test input") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find chat span - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 chat_span = chat_spans[0] # gen_ai.system should be set from the model (TestModel -> 'test') - assert "gen_ai.system" in chat_span["data"] - assert chat_span["data"]["gen_ai.system"] == "test" + assert "gen_ai.system" in chat_span["attributes"] + assert chat_span["attributes"]["gen_ai.system"] == "test" @pytest.mark.asyncio -async def test_include_prompts_false(sentry_init, capture_events, get_test_agent): +async def test_include_prompts_false(sentry_init, capture_items, get_test_agent): """ Test that prompts are not captured when include_prompts=False. """ @@ -851,25 +880,26 @@ async def test_include_prompts_false(sentry_init, capture_events, get_test_agent send_default_pii=True, # Even with PII enabled, prompts should not be captured ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Sensitive prompt") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Verify that messages and response text are not captured for span in chat_spans: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] @pytest.mark.asyncio -async def test_include_prompts_true(sentry_init, capture_events, get_test_agent): +async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): """ Test that prompts are captured when include_prompts=True (default). """ @@ -879,26 +909,27 @@ async def test_include_prompts_true(sentry_init, capture_events, get_test_agent) send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Test prompt") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Verify that messages are captured in chat spans assert len(chat_spans) >= 1 for chat_span in chat_spans: - assert "gen_ai.request.messages" in chat_span["data"] + assert "gen_ai.request.messages" in chat_span["attributes"] @pytest.mark.asyncio async def test_include_prompts_false_with_tools( - sentry_init, capture_events, get_test_agent + sentry_init, capture_items, get_test_agent ): """ Test that tool input/output are not captured when include_prompts=False. @@ -916,26 +947,27 @@ def test_tool(value: int) -> int: """A test tool.""" return value * 2 - events = capture_events() + items = capture_items("transaction", "span") await test_agent.run("Use the test tool with value 5") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find tool spans - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] # If tool was executed, verify input/output are not captured for tool_span in tool_spans: - assert "gen_ai.tool.input" not in tool_span["data"] - assert "gen_ai.tool.output" not in tool_span["data"] + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] @pytest.mark.asyncio -async def test_include_prompts_requires_pii( - sentry_init, capture_events, get_test_agent -): +async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test_agent): """ Test that include_prompts requires send_default_pii=True. """ @@ -945,25 +977,26 @@ async def test_include_prompts_requires_pii( send_default_pii=False, # PII disabled ) - events = capture_events() + items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Test prompt") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Even with include_prompts=True, if PII is disabled, messages should not be captured for span in chat_spans: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] @pytest.mark.asyncio -async def test_mcp_tool_execution_spans(sentry_init, capture_events): +async def test_mcp_tool_execution_spans(sentry_init, capture_items): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. @@ -1035,12 +1068,10 @@ async def mock_map_tool_result_part(part): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Simulate MCP tool execution within a transaction through CombinedToolset - with sentry_sdk.start_transaction( - op="ai.run", name="invoke_agent test_mcp_agent" - ) as transaction: + with sentry_sdk.start_transaction(op="ai.run", name="invoke_agent test_mcp_agent"): # Set up the agent context scope = sentry_sdk.get_current_scope() scope._contexts["pydantic_ai_agent"] = { @@ -1080,13 +1111,10 @@ async def mock_map_tool_result_part(part): # MCP tool might raise if not fully mocked, that's okay pass - events_list = events + events_list = items if len(events_list) == 0: pytest.skip("No events captured, MCP test setup incomplete") - (transaction,) = events_list - transaction["spans"] - # Note: This test manually calls combined.call_tool which doesn't go through # ToolManager._call_tool (which is what the integration patches). # In real-world usage, MCP tools are called through agent.run() which uses ToolManager. @@ -1256,7 +1284,7 @@ async def run_and_check_context(agent, agent_name): @pytest.mark.asyncio -async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events): +async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): """ Test that invoke_agent span handles list user prompts correctly. """ @@ -1271,15 +1299,14 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Use a list as user prompt await agent.run(["First part", "Second part"]) - (transaction,) = events - # Check that the invoke_agent transaction has messages data # The invoke_agent is the transaction itself + (transaction,) = [item.payload for item in items if item.type == "transaction"] if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: messages_str = transaction["contexts"]["trace"]["data"][ "gen_ai.request.messages" @@ -1299,7 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events): ], ) async def test_invoke_agent_with_instructions( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """ Test that invoke_agent span handles instructions correctly. @@ -1322,31 +1349,34 @@ async def test_invoke_agent_with_instructions( send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") await agent.run("Test input") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 chat_span = chat_spans[0] if send_default_pii and include_prompts: - system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + system_instructions = chat_span["attributes"][ + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS + ] assert json.loads(system_instructions) == [ {"type": "text", "content": "System prompt"}, {"type": "text", "content": "Instruction 1\nInstruction 2"}, ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] @pytest.mark.asyncio -async def test_model_name_extraction_with_callable(sentry_init, capture_events): +async def test_model_name_extraction_with_callable(sentry_init, capture_items): """ Test model name extraction when model has a callable name() method. """ @@ -1372,7 +1402,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_events): @pytest.mark.asyncio -async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events): +async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items): """ Test model name extraction falls back to str() when no name attribute exists. """ @@ -1399,7 +1429,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events @pytest.mark.asyncio -async def test_model_settings_object_style(sentry_init, capture_events): +async def test_model_settings_object_style(sentry_init, capture_items): """ Test that object-style model settings (non-dict) are handled correctly. """ @@ -1433,7 +1463,7 @@ async def test_model_settings_object_style(sentry_init, capture_events): @pytest.mark.asyncio -async def test_usage_data_partial(sentry_init, capture_events): +async def test_usage_data_partial(sentry_init, capture_items): """ Test that usage data is correctly handled when only some fields are present. """ @@ -1447,14 +1477,15 @@ async def test_usage_data_partial(sentry_init, capture_events): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") await agent.run("Test input") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 # Check that usage data fields exist (they may or may not be set depending on TestModel) @@ -1464,7 +1495,7 @@ async def test_usage_data_partial(sentry_init, capture_events): @pytest.mark.asyncio -async def test_agent_data_from_scope(sentry_init, capture_events): +async def test_agent_data_from_scope(sentry_init, capture_items): """ Test that agent data can be retrieved from Sentry scope when not passed directly. """ @@ -1479,20 +1510,19 @@ async def test_agent_data_from_scope(sentry_init, capture_events): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") # The integration automatically sets agent in scope during execution await agent.run("Test input") - (transaction,) = events - - # Verify agent name is captured + # Verify agent name is capture + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_scope_agent" @pytest.mark.asyncio async def test_available_tools_without_description( - sentry_init, capture_events, get_test_agent + sentry_init, capture_items, get_test_agent ): """ Test that available tools are captured even when description is missing. @@ -1509,23 +1539,24 @@ def tool_without_desc(x: int) -> int: # No docstring = no description return x * 2 - events = capture_events() + items = capture_items("transaction", "span") await test_agent.run("Use the tool with 5") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] if chat_spans: chat_span = chat_spans[0] - if "gen_ai.request.available_tools" in chat_span["data"]: - tools_str = chat_span["data"]["gen_ai.request.available_tools"] + if "gen_ai.request.available_tools" in chat_span["attributes"]: + tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] assert "tool_without_desc" in tools_str @pytest.mark.asyncio -async def test_output_with_tool_calls(sentry_init, capture_events, get_test_agent): +async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent): """ Test that tool calls in model response are captured correctly. """ @@ -1542,14 +1573,15 @@ def calc_tool(value: int) -> int: """Calculate something.""" return value + 10 - events = capture_events() + items = capture_items("transaction", "span") await test_agent.run("Use calc_tool with 5") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # At least one chat span should exist assert len(chat_spans) >= 1 @@ -1558,11 +1590,11 @@ def calc_tool(value: int) -> int: for chat_span in chat_spans: # Tool calls may or may not be in response depending on TestModel behavior # Just verify the span was created and has basic data - assert "gen_ai.operation.name" in chat_span["data"] + assert "gen_ai.operation.name" in chat_span["attributes"] @pytest.mark.asyncio -async def test_message_formatting_with_different_parts(sentry_init, capture_events): +async def test_message_formatting_with_different_parts(sentry_init, capture_items): """ Test that different message part types are handled correctly in ai_client span. """ @@ -1579,7 +1611,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Create message history with different part types history = [ @@ -1594,24 +1626,25 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even await agent.run("What did I say?", message_history=history) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Should have chat spans assert len(chat_spans) >= 1 # Check that messages are captured chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["data"]: - messages_data = chat_span["data"]["gen_ai.request.messages"] + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_data = chat_span["attributes"]["gen_ai.request.messages"] # Should contain message history assert messages_data is not None @pytest.mark.asyncio -async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_events): +async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_items): """ Test that update_invoke_agent_span handles None output gracefully. """ @@ -1639,7 +1672,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_ev @pytest.mark.asyncio -async def test_update_ai_client_span_with_none_response(sentry_init, capture_events): +async def test_update_ai_client_span_with_none_response(sentry_init, capture_items): """ Test that update_ai_client_span handles None response gracefully. """ @@ -1666,7 +1699,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_eve @pytest.mark.asyncio -async def test_agent_without_name(sentry_init, capture_events): +async def test_agent_without_name(sentry_init, capture_items): """ Test that agent without a name is handled correctly. """ @@ -1678,20 +1711,18 @@ async def test_agent_without_name(sentry_init, capture_events): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") await agent.run("Test input") - (transaction,) = events - # Should still create transaction, just with default name - assert transaction["type"] == "transaction" + (transaction,) = (item.payload for item in items if item.type == "transaction") # Transaction name should be "invoke_agent agent" or similar default assert "invoke_agent" in transaction["transaction"] @pytest.mark.asyncio -async def test_model_response_without_parts(sentry_init, capture_events): +async def test_model_response_without_parts(sentry_init, capture_items): """ Test handling of model response without parts attribute. """ @@ -1723,7 +1754,7 @@ async def test_model_response_without_parts(sentry_init, capture_events): @pytest.mark.asyncio -async def test_input_messages_error_handling(sentry_init, capture_events): +async def test_input_messages_error_handling(sentry_init, capture_items): """ Test that _set_input_messages handles errors gracefully. """ @@ -1751,7 +1782,7 @@ async def test_input_messages_error_handling(sentry_init, capture_events): @pytest.mark.asyncio -async def test_available_tools_error_handling(sentry_init, capture_events): +async def test_available_tools_error_handling(sentry_init, capture_items): """ Test that _set_available_tools handles errors gracefully. """ @@ -1781,7 +1812,7 @@ async def test_available_tools_error_handling(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_usage_data_with_none_usage(sentry_init, capture_events): +async def test_set_usage_data_with_none_usage(sentry_init, capture_items): """ Test that _set_usage_data handles None usage gracefully. """ @@ -1806,7 +1837,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_usage_data_with_partial_fields(sentry_init, capture_events): +async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): """ Test that _set_usage_data handles usage with only some fields. """ @@ -1838,7 +1869,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_events): @pytest.mark.asyncio -async def test_message_parts_with_tool_return(sentry_init, capture_events): +async def test_message_parts_with_tool_return(sentry_init, capture_items): """ Test that ToolReturnPart messages are handled correctly. """ @@ -1860,22 +1891,23 @@ def test_tool(x: int) -> int: send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") # Run with history containing tool return await agent.run("Use test_tool with 5") - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] # Should have chat spans assert len(chat_spans) >= 1 @pytest.mark.asyncio -async def test_message_parts_with_list_content(sentry_init, capture_events): +async def test_message_parts_with_list_content(sentry_init, capture_items): """ Test that message parts with list content are handled correctly. """ @@ -1910,7 +1942,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_events): @pytest.mark.asyncio -async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events): +async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items): """ Test that _set_output_data handles both text and tool calls in response. """ @@ -1949,7 +1981,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events) @pytest.mark.asyncio -async def test_output_data_error_handling(sentry_init, capture_events): +async def test_output_data_error_handling(sentry_init, capture_items): """ Test that _set_output_data handles errors in formatting gracefully. """ @@ -1981,7 +2013,7 @@ async def test_output_data_error_handling(sentry_init, capture_events): @pytest.mark.asyncio -async def test_message_with_system_prompt_part(sentry_init, capture_events): +async def test_message_with_system_prompt_part(sentry_init, capture_items): """ Test that SystemPromptPart is handled with correct role. """ @@ -2017,7 +2049,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_events): @pytest.mark.asyncio -async def test_message_with_instructions(sentry_init, capture_events): +async def test_message_with_instructions(sentry_init, capture_items): """ Test that messages with instructions field are handled correctly. """ @@ -2052,7 +2084,7 @@ async def test_message_with_instructions(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_input_messages_without_prompts(sentry_init, capture_events): +async def test_set_input_messages_without_prompts(sentry_init, capture_items): """ Test that _set_input_messages respects _should_send_prompts(). """ @@ -2078,7 +2110,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_output_data_without_prompts(sentry_init, capture_events): +async def test_set_output_data_without_prompts(sentry_init, capture_items): """ Test that _set_output_data respects _should_send_prompts(). """ @@ -2107,7 +2139,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_events): @pytest.mark.asyncio -async def test_get_model_name_with_exception_in_callable(sentry_init, capture_events): +async def test_get_model_name_with_exception_in_callable(sentry_init, capture_items): """ Test that _get_model_name handles exceptions in name() callable. """ @@ -2131,7 +2163,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_ev @pytest.mark.asyncio -async def test_get_model_name_with_string_model(sentry_init, capture_events): +async def test_get_model_name_with_string_model(sentry_init, capture_items): """ Test that _get_model_name handles string models. """ @@ -2150,7 +2182,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_events): @pytest.mark.asyncio -async def test_get_model_name_with_none(sentry_init, capture_events): +async def test_get_model_name_with_none(sentry_init, capture_items): """ Test that _get_model_name handles None model. """ @@ -2169,7 +2201,7 @@ async def test_get_model_name_with_none(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_model_data_with_system(sentry_init, capture_events): +async def test_set_model_data_with_system(sentry_init, capture_items): """ Test that _set_model_data captures system from model. """ @@ -2200,7 +2232,7 @@ async def test_set_model_data_with_system(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_model_data_from_agent_scope(sentry_init, capture_events): +async def test_set_model_data_from_agent_scope(sentry_init, capture_items): """ Test that _set_model_data retrieves model from agent in scope when not passed. """ @@ -2234,7 +2266,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_model_data_with_none_settings_values(sentry_init, capture_events): +async def test_set_model_data_with_none_settings_values(sentry_init, capture_items): """ Test that _set_model_data skips None values in settings. """ @@ -2266,7 +2298,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_eve @pytest.mark.asyncio -async def test_should_send_prompts_without_pii(sentry_init, capture_events): +async def test_should_send_prompts_without_pii(sentry_init, capture_items): """ Test that _should_send_prompts returns False when PII disabled. """ @@ -2284,7 +2316,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_agent_data_without_agent(sentry_init, capture_events): +async def test_set_agent_data_without_agent(sentry_init, capture_items): """ Test that _set_agent_data handles None agent gracefully. """ @@ -2309,7 +2341,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_agent_data_from_scope(sentry_init, capture_events): +async def test_set_agent_data_from_scope(sentry_init, capture_items): """ Test that _set_agent_data retrieves agent from scope when not passed. """ @@ -2341,7 +2373,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_agent_data_without_name(sentry_init, capture_events): +async def test_set_agent_data_without_name(sentry_init, capture_items): """ Test that _set_agent_data handles agent without name attribute. """ @@ -2371,7 +2403,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_available_tools_without_toolset(sentry_init, capture_events): +async def test_set_available_tools_without_toolset(sentry_init, capture_items): """ Test that _set_available_tools handles agent without toolset. """ @@ -2401,7 +2433,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_events): @pytest.mark.asyncio -async def test_set_available_tools_with_schema(sentry_init, capture_events): +async def test_set_available_tools_with_schema(sentry_init, capture_items): """ Test that _set_available_tools extracts tool schema correctly. """ @@ -2437,7 +2469,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_events): @pytest.mark.asyncio -async def test_execute_tool_span_creation(sentry_init, capture_events): +async def test_execute_tool_span_creation(sentry_init, capture_items): """ Test direct creation of execute_tool span. """ @@ -2464,7 +2496,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_events): @pytest.mark.asyncio -async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events): +async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items): """ Test execute_tool span with MCP tool type. """ @@ -2490,7 +2522,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events): @pytest.mark.asyncio -async def test_execute_tool_span_without_prompts(sentry_init, capture_events): +async def test_execute_tool_span_without_prompts(sentry_init, capture_items): """ Test that execute_tool span respects _should_send_prompts(). """ @@ -2517,7 +2549,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_events): @pytest.mark.asyncio -async def test_execute_tool_span_with_none_args(sentry_init, capture_events): +async def test_execute_tool_span_with_none_args(sentry_init, capture_items): """ Test execute_tool span with None args. """ @@ -2540,7 +2572,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_events): @pytest.mark.asyncio -async def test_update_execute_tool_span_with_none_span(sentry_init, capture_events): +async def test_update_execute_tool_span_with_none_span(sentry_init, capture_items): """ Test that update_execute_tool_span handles None span gracefully. """ @@ -2561,7 +2593,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_even @pytest.mark.asyncio -async def test_update_execute_tool_span_with_none_result(sentry_init, capture_events): +async def test_update_execute_tool_span_with_none_result(sentry_init, capture_items): """ Test that update_execute_tool_span handles None result gracefully. """ @@ -2588,7 +2620,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_ev @pytest.mark.asyncio -async def test_tool_execution_without_span_context(sentry_init, capture_events): +async def test_tool_execution_without_span_context(sentry_init, capture_items): """ Test that tool execution patch handles case when no span context exists. This tests the code path where current_span is None in _patch_tool_execution. @@ -2617,7 +2649,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_events): @pytest.mark.asyncio -async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_events): +async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_items): """ Test that invoke_agent_span skips callable instructions correctly. """ @@ -2650,7 +2682,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_ @pytest.mark.asyncio -async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_events): +async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_items): """ Test that invoke_agent_span handles string instructions (not list). """ @@ -2680,7 +2712,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_e @pytest.mark.asyncio -async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events): +async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items): """ Test that ai_client_span reads streaming flag from scope. """ @@ -2706,7 +2738,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events): @pytest.mark.asyncio -async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events): +async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items): """ Test that ai_client_span gets agent from scope when not passed. """ @@ -2759,7 +2791,7 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type): @pytest.mark.asyncio -async def test_binary_content_encoding_image(sentry_init, capture_events): +async def test_binary_content_encoding_image(sentry_init, capture_items): """Test that BinaryContent with image data is properly encoded in messages.""" sentry_init( integrations=[PydanticAIIntegration()], @@ -2767,7 +2799,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_events): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): span = sentry_sdk.start_span(op="test_span") @@ -2782,14 +2814,14 @@ async def test_binary_content_encoding_image(sentry_init, capture_events): _set_input_messages(span, [mock_msg]) span.finish() - (event,) = events + (event,) = (item.payload for item in items if item.type == "transaction") span_data = event["spans"][0]["data"] messages_data = _get_messages_from_span(span_data) assert _find_binary_content(messages_data, "image", "image/png") @pytest.mark.asyncio -async def test_binary_content_encoding_mixed_content(sentry_init, capture_events): +async def test_binary_content_encoding_mixed_content(sentry_init, capture_items): """Test that BinaryContent mixed with text content is properly handled.""" sentry_init( integrations=[PydanticAIIntegration()], @@ -2797,7 +2829,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): span = sentry_sdk.start_span(op="test_span") @@ -2814,7 +2846,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events _set_input_messages(span, [mock_msg]) span.finish() - (event,) = events + (event,) = (item.payload for item in items if item.type == "transaction") span_data = event["spans"][0]["data"] messages_data = _get_messages_from_span(span_data) @@ -2830,7 +2862,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events @pytest.mark.asyncio -async def test_binary_content_in_agent_run(sentry_init, capture_events): +async def test_binary_content_in_agent_run(sentry_init, capture_items): """Test that BinaryContent in actual agent run is properly captured in spans.""" agent = Agent("test", name="test_binary_agent") @@ -2840,28 +2872,30 @@ async def test_binary_content_in_agent_run(sentry_init, capture_events): send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") binary_content = BinaryContent( data=b"fake_image_data_for_testing", media_type="image/png" ) await agent.run(["Analyze this image:", binary_content]) - (transaction,) = events - chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["data"]: - messages_str = str(chat_span["data"]["gen_ai.request.messages"]) + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_str = str(chat_span["attributes"]["gen_ai.request.messages"]) assert any(keyword in messages_str for keyword in ["blob", "image", "base64"]) @pytest.mark.asyncio -async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events): +async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): """Test that cache_read_tokens and cache_write_tokens are tracked.""" sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): span = sentry_sdk.start_span(op="test_span") @@ -2874,7 +2908,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events): _set_usage_data(span, usage) span.finish() - (event,) = events + (event,) = (item.payload for item in items if item.type == "transaction") (span_data,) = event["spans"] assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 @@ -2922,7 +2956,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events): ], ) def test_image_url_base64_content_in_span( - sentry_init, capture_events, url, image_url_kwargs, expected_content + sentry_init, capture_items, url, image_url_kwargs, expected_content ): from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span @@ -2932,7 +2966,7 @@ def test_image_url_base64_content_in_span( send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): image_url = ImageUrl(url=url, **image_url_kwargs) @@ -2944,10 +2978,12 @@ def test_image_url_base64_content_in_span( span = ai_client_span([mock_msg], None, None, None) span.finish() - (event,) = events - chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] assert len(chat_spans) >= 1 - messages_data = _get_messages_from_span(chat_spans[0]["data"]) + messages_data = _get_messages_from_span(chat_spans[0]["attributes"]) found_image = False for msg in messages_data: @@ -2992,7 +3028,7 @@ def test_image_url_base64_content_in_span( ], ) async def test_invoke_agent_image_url( - sentry_init, capture_events, url, image_url_kwargs, expected_content + sentry_init, capture_items, url, image_url_kwargs, expected_content ): sentry_init( integrations=[PydanticAIIntegration()], @@ -3002,17 +3038,18 @@ async def test_invoke_agent_image_url( agent = Agent("test", name="test_image_url_agent") - events = capture_events() + items = capture_items("transaction", "span") image_url = ImageUrl(url=url, **image_url_kwargs) await agent.run([image_url, "Describe this image"]) - (transaction,) = events - found_image = False - chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] for chat_span in chat_spans: - messages_data = _get_messages_from_span(chat_span["data"]) + messages_data = _get_messages_from_span(chat_span["attributes"]) for msg in messages_data: if "content" not in msg: continue @@ -3025,7 +3062,7 @@ async def test_invoke_agent_image_url( @pytest.mark.asyncio -async def test_tool_description_in_execute_tool_span(sentry_init, capture_events): +async def test_tool_description_in_execute_tool_span(sentry_init, capture_items): """ Test that tool description from the tool's docstring is included in execute_tool spans. """ @@ -3046,18 +3083,24 @@ def multiply_numbers(a: int, b: int) -> int: send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") result = await agent.run("What is 5 times 3?") assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] - tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] assert len(tool_spans) >= 1 tool_span = tool_spans[0] - assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers" - assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"] - assert "Multiply two numbers" in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers" + assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"] + assert ( + "Multiply two numbers" + in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + ) From 7befc7d3863593c0414d437e59f7591ac4334cf5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:03:38 +0200 Subject: [PATCH 19/84] . --- sentry_sdk/client.py | 16 ++++++++-- tests/tracing/test_decorator.py | 53 ++++++++++++++++++++++++--------- tests/tracing/test_misc.py | 8 ++--- 3 files changed, 56 insertions(+), 21 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index c6df2f564b..99e58ec499 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -245,9 +245,15 @@ def _serialized_v1_span_to_serialized_v2_span( res["attributes"] = {} for key, value in attributes.items(): - res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute( - value - ) + converted_value = _serialized_v1_attribute_to_serialized_v2_attribute(value) + if converted_value is None: + continue + + res["attributes"][key] = converted_value + + # Remove redundant attribute, as status is stored in the status field. + if "status" in res["attributes"]: + del res["attributes"]["status"] return res @@ -268,6 +274,10 @@ def _split_gen_ai_spans( non_gen_ai_spans = [] gen_ai_spans = [] for span in spans: + if not isinstance(span, dict): + non_gen_ai_spans.append(span) + continue + span_op = span.get("op") if isinstance(span_op, str) and span_op.startswith("gen_ai."): gen_ai_spans.append(span) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index 15432f5862..e73323138a 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -121,9 +121,9 @@ async def _some_function_traced(a, b, c): ) -def test_span_templates_ai_dicts(sentry_init, capture_events): +def test_span_templates_ai_dicts(sentry_init, capture_items): sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2): @@ -166,40 +166,57 @@ def my_agent(): with sentry_sdk.start_transaction(name="test-transaction"): my_agent() - (event,) = events - (agent_span, tool_span, chat_span) = event["spans"] + (agent_span, tool_span, chat_span) = ( + item.payload for item in items if item.type == "span" + ) - assert agent_span["op"] == "gen_ai.invoke_agent" + assert agent_span["attributes"]["sentry.op"] == "gen_ai.invoke_agent" assert ( - agent_span["description"] + agent_span["name"] == "invoke_agent test_decorator.test_span_templates_ai_dicts..my_agent" ) - assert agent_span["data"] == { + assert agent_span["attributes"] == { "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts..my_agent", "gen_ai.operation.name": "invoke_agent", + "sentry.environment": "production", + "sentry.op": "gen_ai.invoke_agent", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert tool_span["op"] == "gen_ai.execute_tool" + assert tool_span["attributes"]["sentry.op"] == "gen_ai.execute_tool" assert ( - tool_span["description"] + tool_span["name"] == "execute_tool test_decorator.test_span_templates_ai_dicts..my_tool" ) - assert tool_span["data"] == { + assert tool_span["attributes"] == { "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts..my_tool", "gen_ai.operation.name": "execute_tool", "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 20, "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.execute_tool", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert "gen_ai.tool.description" not in tool_span["data"] + assert "gen_ai.tool.description" not in tool_span["attributes"] - assert chat_span["op"] == "gen_ai.chat" - assert chat_span["description"] == "chat my-gpt-4o-mini" - assert chat_span["data"] == { + assert chat_span["attributes"]["sentry.op"] == "gen_ai.chat" + assert chat_span["name"] == "chat my-gpt-4o-mini" + assert chat_span["attributes"] == { "gen_ai.operation.name": "chat", "gen_ai.request.frequency_penalty": 1.0, "gen_ai.request.max_tokens": 100, @@ -213,6 +230,14 @@ def my_agent(): "gen_ai.usage.input_tokens": 11, "gen_ai.usage.output_tokens": 22, "gen_ai.usage.total_tokens": 33, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 8895c98dbc..f69e19791a 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -611,11 +611,11 @@ class TestConversationIdPropagation: """Tests for conversation_id propagation to AI spans.""" def test_conversation_id_propagates_to_span_with_gen_ai_operation_name( - self, sentry_init, capture_events + self, sentry_init, capture_items ): """Span with gen_ai.operation.name data should get conversation_id.""" sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("span") scope = sentry_sdk.get_current_scope() scope.set_conversation_id("conv-op-name-test") @@ -624,8 +624,8 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name( with start_span(op="http.client") as span: span.set_data("gen_ai.operation.name", "chat") - (event,) = events - span_data = event["spans"][0]["data"] + spans = [item.payload for item in items if item.type == "span"] + span_data = spans[0]["data"] assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test" def test_conversation_id_propagates_to_span_with_ai_op( From fb348bb1037ce1350c714ad3da8ec7b77f79c350 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:12:59 +0200 Subject: [PATCH 20/84] openai-agents tests --- .../openai_agents/test_openai_agents.py | 855 ++++++++++-------- 1 file changed, 470 insertions(+), 385 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 7310e86df5..1c4925915d 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -160,7 +160,7 @@ def test_agent_custom_model(): @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( sentry_init, - capture_events, + capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, @@ -184,7 +184,7 @@ async def test_agent_invocation_span_no_pii( send_default_pii=False, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -193,38 +193,44 @@ async def test_agent_invocation_span_no_pii( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT ) - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] - assert "gen_ai.request.messages" not in invoke_agent_span["data"] - assert "gen_ai.response.text" not in invoke_agent_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + assert "gen_ai.request.messages" not in invoke_agent_span["attributes"] + assert "gen_ai.response.text" not in invoke_agent_span["attributes"] - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 @pytest.mark.asyncio @@ -305,7 +311,7 @@ async def test_agent_invocation_span_no_pii( ) async def test_agent_invocation_span( sentry_init, - capture_events, + capture_items, test_agent_with_instructions, nonstreaming_responses_model_response, instructions, @@ -335,7 +341,7 @@ async def test_agent_invocation_span( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, @@ -346,28 +352,34 @@ async def test_agent_invocation_span( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans - + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span, ai_client_span = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" # Only first case checks "gen_ai.request.messages" until further input handling work. param_id = request.node.callspec.id if "string" in param_id and instructions is None: # type: ignore - assert "gen_ai.system_instructions" not in ai_client_span["data"] + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize( + assert invoke_agent_span["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( [ {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, ] ) elif "string" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -376,13 +388,17 @@ async def test_agent_invocation_span( ] ) elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) elif "blocks_no_type" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -392,13 +408,17 @@ async def test_agent_invocation_span( ] ) elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) elif "blocks" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -408,14 +428,18 @@ async def test_agent_invocation_span( ] ) elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] ) elif "parts_no_type" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -426,14 +450,18 @@ async def test_agent_invocation_span( ] ) elif instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] ) else: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -445,32 +473,32 @@ async def test_agent_invocation_span( ) assert ( - invoke_agent_span["data"]["gen_ai.response.text"] + invoke_agent_span["attributes"]["gen_ai.response.text"] == "Hello, how can I help you?" ) - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 @pytest.mark.asyncio async def test_client_span_custom_model( sentry_init, - capture_events, + capture_items, test_agent_custom_model, nonstreaming_responses_model_response, get_model_response, @@ -497,7 +525,7 @@ async def test_client_span_custom_model( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "spans") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -506,17 +534,18 @@ async def test_client_span_custom_model( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert ai_client_span["description"] == "chat my-custom-model" - assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" + assert ai_client_span["name"] == "chat my-custom-model" + assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model" def test_agent_invocation_span_sync_no_pii( sentry_init, - capture_events, + capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, @@ -543,42 +572,48 @@ def test_agent_invocation_span_sync_no_pii( send_default_pii=False, ) - events = capture_events() + items = capture_items("span", "transaction") result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config) assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] @pytest.mark.parametrize( @@ -658,7 +693,7 @@ def test_agent_invocation_span_sync_no_pii( ) def test_agent_invocation_span_sync( sentry_init, - capture_events, + capture_items, test_agent_with_instructions, nonstreaming_responses_model_response, instructions, @@ -688,7 +723,7 @@ def test_agent_invocation_span_sync( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = agents.Runner.run_sync( agent, @@ -699,36 +734,40 @@ def test_agent_invocation_span_sync( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans - + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span, ai_client_span = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 param_id = request.node.callspec.id if "string" in param_id and instructions is None: # type: ignore - assert "gen_ai.system_instructions" not in ai_client_span["data"] + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] elif "string" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -737,13 +776,17 @@ def test_agent_invocation_span_sync( ] ) elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) elif "blocks_no_type" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -753,13 +796,17 @@ def test_agent_invocation_span_sync( ] ) elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) elif "blocks" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -769,14 +816,18 @@ def test_agent_invocation_span_sync( ] ) elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] ) elif "parts_no_type" in param_id: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -787,14 +838,18 @@ def test_agent_invocation_span_sync( ] ) elif instructions is None: # type: ignore - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] ) else: - assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( [ { "type": "text", @@ -807,7 +862,7 @@ def test_agent_invocation_span_sync( @pytest.mark.asyncio -async def test_handoff_span(sentry_init, capture_events, get_model_response): +async def test_handoff_span(sentry_init, capture_items, get_model_response): """ Test that handoff spans are created when agents hand off to other agents. """ @@ -910,7 +965,7 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") result = await agents.Runner.run( primary_agent, @@ -920,21 +975,22 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response): assert result is not None - (transaction,) = events - spans = transaction["spans"] - handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF) + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) # Verify handoff span was created assert handoff_span is not None - assert ( - handoff_span["description"] == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" @pytest.mark.asyncio async def test_max_turns_before_handoff_span( - sentry_init, capture_events, get_model_response + sentry_init, capture_items, get_model_response ): """ Example raising agents.exceptions.AgentsException after the agent invocation span is complete. @@ -1038,7 +1094,7 @@ async def test_max_turns_before_handoff_span( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") with pytest.raises(MaxTurnsExceeded): await agents.Runner.run( @@ -1048,22 +1104,23 @@ async def test_max_turns_before_handoff_span( max_turns=1, ) - (error, transaction) = events - spans = transaction["spans"] - handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF) + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) # Verify handoff span was created assert handoff_span is not None - assert ( - handoff_span["description"] == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, responses_tool_call_model_responses, @@ -1135,7 +1192,7 @@ def simple_test_tool(message: str) -> str: send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") await agents.Runner.run( agent_with_tool, @@ -1143,13 +1200,26 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] - agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT) + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] + agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) available_tool = { "name": "simple_test_tool", @@ -1189,39 +1259,36 @@ def simple_test_tool(message: str) -> str: } ) - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] + agent_span["attributes"]["gen_ai.request.available_tools"] )[0] assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] + ai_client_span1["attributes"]["gen_ai.request.available_tools"] )[0] assert all( ai_client_span1_available_tool[k] == v for k, v in available_tool.items() ) - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"]["gen_ai.request.messages"] == safe_serialize( [ { "role": "user", @@ -1231,14 +1298,14 @@ def simple_test_tool(message: str) -> str: }, ] ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 tool_call = { "arguments": '{"message": "hello"}', @@ -1252,41 +1319,41 @@ def simple_test_tool(message: str) -> str: if OPENAI_VERSION >= (2, 25, 0): tool_call["namespace"] = None - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + assert json.loads(ai_client_span1["attributes"]["gen_ai.response.tool_calls"]) == [ tool_call ] - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] + tool_span["attributes"]["gen_ai.request.available_tools"] )[0] assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["data"]["gen_ai.system"] == "openai" - assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["description"] == "chat gpt-4" - assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" ai_client_span2_available_tool = json.loads( - ai_client_span2["data"]["gen_ai.request.available_tools"] + ai_client_span2["attributes"]["gen_ai.request.available_tools"] )[0] assert all( ai_client_span2_available_tool[k] == v for k, v in available_tool.items() ) - assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"]["gen_ai.request.messages"] == safe_serialize( [ { "role": "tool", @@ -1300,19 +1367,19 @@ def simple_test_tool(message: str) -> str: }, ] ) - assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 assert ( - ai_client_span2["data"]["gen_ai.response.text"] + ai_client_span2["attributes"]["gen_ai.response.text"] == "Task completed using the tool" ) - assert ai_client_span2["data"]["gen_ai.system"] == "openai" - assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio @@ -1570,7 +1637,7 @@ async def test_hosted_mcp_tool_propagation_headers( @pytest.mark.asyncio -async def test_model_behavior_error(sentry_init, capture_events, test_agent): +async def test_model_behavior_error(sentry_init, capture_items, test_agent): """ Example raising agents.exceptions.AgentsException before the agent invocation span is complete. The mocked API response indicates that "wrong_tool" was called. @@ -1613,7 +1680,7 @@ def simple_test_tool(message: str) -> str: send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") with pytest.raises(ModelBehaviorError): await agents.Runner.run( @@ -1622,26 +1689,27 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - (error, transaction) = events - spans = transaction["spans"] + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] ( agent_span, ai_client_span1, ) = spans - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" # Error due to unrecognized tool in model response. - assert agent_span["status"] == "internal_error" - assert agent_span["tags"]["status"] == "internal_error" + assert agent_span["status"] == "error" @pytest.mark.asyncio -async def test_error_handling(sentry_init, capture_events, test_agent): +async def test_error_handling(sentry_init, capture_items, test_agent): """ Test error handling in agent execution. """ @@ -1660,39 +1728,39 @@ async def test_error_handling(sentry_init, capture_events, test_agent): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("error", "span", "transaction") with pytest.raises(Exception, match="Model Error"): await agents.Runner.run( test_agent, "Test input", run_config=test_run_config ) - ( - error_event, - transaction, - ) = events - + error_events = [item.payload for item in items if item.type == "event"] + assert len(error_events) == 1 + error_event = error_events[0] assert error_event["exception"]["values"][0]["type"] == "Exception" assert error_event["exception"]["values"][0]["value"] == "Model Error" assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents" - spans = transaction["spans"] - (invoke_agent_span, ai_client_span) = spans - + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["origin"] == "auto.ai.openai_agents" + spans = [item.payload for item in items if item.type == "span"] + (invoke_agent_span, ai_client_span) = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["origin"] == "auto.ai.openai_agents" - assert ai_client_span["status"] == "internal_error" - assert ai_client_span["tags"]["status"] == "internal_error" + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "error" @pytest.mark.asyncio -async def test_error_captures_input_data(sentry_init, capture_events, test_agent): +async def test_error_captures_input_data(sentry_init, capture_items, test_agent): """ Test that input data is captured even when the API call raises an exception. This verifies that _set_input_data is called before the API call. @@ -1725,37 +1793,36 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent send_default_pii=True, ) - events = capture_events() + items = capture_items("error", "span") with pytest.raises(InternalServerError, match="Error code: 500"): await agents.Runner.run(agent, "Test input", run_config=test_run_config) - ( - error_event, - transaction, - ) = events - + error_events = [item.payload for item in items if item.type == "event"] + assert len(error_events) == 1 + error_event = error_events[0] assert error_event["exception"]["values"][0]["type"] == "InternalServerError" assert error_event["exception"]["values"][0]["value"] == "Error code: 500" - spans = transaction["spans"] - ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0] + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ][0] - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["status"] == "internal_error" - assert ai_client_span["tags"]["status"] == "internal_error" + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["status"] == "error" - assert "gen_ai.request.messages" in ai_client_span["data"] + assert "gen_ai.request.messages" in ai_client_span["attributes"] request_messages = safe_serialize( [ {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, ] ) - assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages + assert ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages @pytest.mark.asyncio -async def test_span_status_error(sentry_init, capture_events, test_agent): +async def test_span_status_error(sentry_init, capture_items, test_agent): with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): with patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" @@ -1770,23 +1837,26 @@ async def test_span_status_error(sentry_init, capture_events, test_agent): traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction", "span") with pytest.raises(ValueError, match="Model Error"): await agents.Runner.run( test_agent, "Test input", run_config=test_run_config ) - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["contexts"]["trace"]["status"] == "error" @pytest.mark.asyncio async def test_mcp_tool_execution_spans( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, capture_items, test_agent, get_model_response ): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. @@ -1880,7 +1950,7 @@ async def test_mcp_tool_execution_spans( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") await agents.Runner.run( agent, @@ -1888,33 +1958,35 @@ async def test_mcp_tool_execution_spans( run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find the MCP execute_tool span mcp_tool_span = None for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": + if span.get("name") == "execute_tool test_mcp_tool": mcp_tool_span = span break # Verify the MCP tool span was created assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" assert ( - mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully" + mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "search term"}' + ) + assert ( + mcp_tool_span["attributes"]["gen_ai.tool.output"] + == "MCP tool executed successfully" ) # Verify no error status since error was None - assert mcp_tool_span.get("status") != "internal_error" - assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" + assert mcp_tool_span.get("status") != "error" + assert mcp_tool_span.get("tags", {}).get("status") != "error" @pytest.mark.asyncio async def test_mcp_tool_execution_with_error( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, capture_items, test_agent, get_model_response ): """ Test that MCP tool calls with errors are tracked with error status. @@ -2008,7 +2080,7 @@ async def test_mcp_tool_execution_with_error( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") await agents.Runner.run( agent, @@ -2016,31 +2088,29 @@ async def test_mcp_tool_execution_with_error( run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find the MCP execute_tool span with error mcp_tool_span = None for span in spans: - if span.get("description") == "execute_tool failing_mcp_tool": + if span.get("name") == "execute_tool failing_mcp_tool": mcp_tool_span = span break # Verify the MCP tool span was created with error status assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["data"]["gen_ai.tool.output"] is None + assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' + assert mcp_tool_span["attributes"]["gen_ai.tool.output"] is None # Verify error status was set - assert mcp_tool_span["status"] == "internal_error" - assert mcp_tool_span["tags"]["status"] == "internal_error" + assert mcp_tool_span["status"] == "error" @pytest.mark.asyncio async def test_mcp_tool_execution_without_pii( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, capture_items, test_agent, get_model_response ): """ Test that MCP tool input/output are not included when send_default_pii is False. @@ -2134,7 +2204,7 @@ async def test_mcp_tool_execution_without_pii( send_default_pii=False, # PII disabled ) - events = capture_events() + items = capture_items("span", "transaction") await agents.Runner.run( agent, @@ -2142,30 +2212,29 @@ async def test_mcp_tool_execution_without_pii( run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find the MCP execute_tool span mcp_tool_span = None for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": + if span.get("name") == "execute_tool test_mcp_tool": mcp_tool_span = span break # Verify the MCP tool span was created but without input/output assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["data"] - assert "gen_ai.tool.output" not in mcp_tool_span["data"] + assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] + assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] @pytest.mark.asyncio async def test_multiple_agents_asyncio( sentry_init, - capture_events, + capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, @@ -2192,7 +2261,7 @@ async def test_multiple_agents_asyncio( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") async def run(): await agents.Runner.run( @@ -2203,14 +2272,10 @@ async def run(): await asyncio.gather(*[run() for _ in range(3)]) - assert len(events) == 3 - txn1, txn2, txn3 = events + txn1, txn2, txn3 = (item.payload for item in items if item.type == "transaction") - assert txn1["type"] == "transaction" assert txn1["transaction"] == "test_agent workflow" - assert txn2["type"] == "transaction" assert txn2["transaction"] == "test_agent workflow" - assert txn3["type"] == "transaction" assert txn3["transaction"] == "test_agent workflow" @@ -2230,7 +2295,7 @@ async def run(): ], ) def test_openai_agents_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_items, test_message, expected_role ): """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -2259,7 +2324,7 @@ def test_openai_agents_message_role_mapping( @pytest.mark.asyncio async def test_tool_execution_error_tracing( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, responses_tool_call_model_responses, @@ -2338,7 +2403,7 @@ def failing_tool(message: str) -> str: send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") # Note: The agents library catches tool exceptions internally, # so we don't expect this to raise @@ -2348,13 +2413,12 @@ def failing_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] # Find the execute_tool span execute_tool_span = None for span in spans: - description = span.get("description", "") + description = span.get("name", "") if description is not None and description.startswith( "execute_tool failing_tool" ): @@ -2363,19 +2427,18 @@ def failing_tool(message: str) -> str: # Verify the execute_tool span was created assert execute_tool_span is not None, "execute_tool span was not created" - assert execute_tool_span["description"] == "execute_tool failing_tool" - assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool" + assert execute_tool_span["name"] == "execute_tool failing_tool" + assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool" # Verify error status was set (this is the key test for our patch) # The span should be marked as error because the tool execution failed - assert execute_tool_span["status"] == "internal_error" - assert execute_tool_span["tags"]["status"] == "internal_error" + assert execute_tool_span["status"] == "error" @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, ): @@ -2437,7 +2500,7 @@ async def test_invoke_agent_span_includes_usage_data( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -2445,29 +2508,30 @@ async def test_invoke_agent_span_includes_usage_data( assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ) # Verify invoke_agent span has usage data from context_wrapper - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5 @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, ): @@ -2529,7 +2593,7 @@ async def test_ai_client_span_includes_response_model( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -2537,20 +2601,21 @@ async def test_ai_client_span_includes_response_model( assert result is not None - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) # Verify ai_client span has response model from API response - assert ai_client_span["description"] == "chat gpt-4" - assert "gen_ai.response.model" in ai_client_span["data"] - assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert ai_client_span["name"] == "chat gpt-4" + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( sentry_init, - capture_events, + capture_items, get_model_response, ): """ @@ -2616,7 +2681,7 @@ async def test_ai_client_span_response_model_with_chat_completions( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -2624,18 +2689,22 @@ async def test_ai_client_span_response_model_with_chat_completions( assert result is not None - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) # Verify response model from API response is captured - assert "gen_ai.response.model" in ai_client_span["data"] - assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4o-mini-2024-07-18" + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4o-mini-2024-07-18" + ) @pytest.mark.asyncio async def test_multiple_llm_calls_aggregate_usage( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, capture_items, test_agent, get_model_response ): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls @@ -2734,7 +2803,7 @@ def calculator(a: int, b: int) -> int: send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent_with_tool, @@ -2744,25 +2813,24 @@ def calculator(a: int, b: int) -> int: assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = spans[0] # Verify invoke_agent span has aggregated usage from both API calls # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 # Cached tokens should be aggregated: 0 + 5 = 5 - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5 # Reasoning tokens should be aggregated: 0 + 3 = 3 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3 @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, ): @@ -2823,7 +2891,7 @@ async def test_invoke_agent_span_includes_response_model( send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -2831,27 +2899,32 @@ async def test_invoke_agent_span_includes_response_model( assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT ) - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify invoke_agent span has response model from API - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) # Also verify ai_client span has it - assert "gen_ai.response.model" in ai_client_span["data"] - assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" @pytest.mark.asyncio async def test_invoke_agent_span_uses_last_response_model( sentry_init, - capture_events, + capture_items, test_agent, get_model_response, ): @@ -2952,7 +3025,7 @@ def calculator(a: int, b: int) -> int: send_default_pii=True, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent_with_tool, @@ -2962,24 +3035,26 @@ def calculator(a: int, b: int) -> int: assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = spans[0] first_ai_client_span = spans[1] second_ai_client_span = spans[3] # After tool span # Invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) # Each ai_client span has its own response model from the API - assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613" + assert first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613" assert ( - second_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + second_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" ) -def test_openai_agents_message_truncation(sentry_init, capture_events): +def test_openai_agents_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in OpenAI Agents integration.""" large_content = ( @@ -3230,7 +3305,7 @@ async def test_streaming_ttft_on_chat_span( @pytest.mark.asyncio async def test_conversation_id_on_all_spans( sentry_init, - capture_events, + capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, @@ -3257,7 +3332,7 @@ async def test_conversation_id_on_all_spans( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, @@ -3268,24 +3343,28 @@ async def test_conversation_id_on_all_spans( assert result is not None - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT ) - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify workflow span (transaction) has conversation_id + (transaction,) = (item.payload for item in items if item.type == "transaction") assert ( transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] == "conv_test_123" ) # Verify invoke_agent span has conversation_id - assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + assert invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" # Verify ai_client span has conversation_id - assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" @pytest.mark.skipif( @@ -3294,7 +3373,7 @@ async def test_conversation_id_on_all_spans( ) @pytest.mark.asyncio async def test_conversation_id_on_tool_span( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, capture_items, test_agent, get_model_response ): """ Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run(). @@ -3391,7 +3470,7 @@ def simple_tool(message: str) -> str: traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") await agents.Runner.run( agent_with_tool, @@ -3400,21 +3479,20 @@ def simple_tool(message: str) -> str: conversation_id="conv_tool_test_456", ) - (transaction,) = events - spans = transaction["spans"] - + spans = [item.payload for item in items if item.type == "span"] # Find the tool span tool_span = None for span in spans: - if span.get("description", "").startswith("execute_tool"): + if span.get("name", "").startswith("execute_tool"): tool_span = span break assert tool_span is not None # Tool span should have the conversation_id passed to Runner.run() - assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456" + assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" # Workflow span (transaction) should have the same conversation_id + (transaction,) = (item.payload for item in items if item.type == "transaction") assert ( transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] == "conv_tool_test_456" @@ -3428,7 +3506,7 @@ def simple_tool(message: str) -> str: @pytest.mark.asyncio async def test_no_conversation_id_when_not_provided( sentry_init, - capture_events, + capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, @@ -3455,7 +3533,7 @@ async def test_no_conversation_id_when_not_provided( traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") # Don't pass conversation_id result = await agents.Runner.run( @@ -3464,16 +3542,23 @@ async def test_no_conversation_id_when_not_provided( assert result is not None - (transaction,) = events - spans = transaction["spans"] + transactions = [item.payload for item in items if item.type == "transaction"] + assert len(transactions) == 1 + transaction = transactions[0] + + spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT ) - ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) # Verify conversation_id is NOT set on any spans assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( - "data", {} + "attributes", {} ) - assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {}) - assert "gen_ai.conversation.id" not in ai_client_span.get("data", {}) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) From 41e409d73164807c557a0ee7563bdd1655f56d83 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:46:26 +0200 Subject: [PATCH 21/84] fix openai-agents tests --- tests/integrations/openai_agents/test_openai_agents.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1c4925915d..294812b0ca 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -525,7 +525,7 @@ async def test_client_span_custom_model( traces_sample_rate=1.0, ) - items = capture_items("transaction", "spans") + items = capture_items("span") result = await agents.Runner.run( agent, "Test input", run_config=test_run_config @@ -1728,7 +1728,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent): traces_sample_rate=1.0, ) - items = capture_items("error", "span", "transaction") + items = capture_items("event", "span", "transaction") with pytest.raises(Exception, match="Model Error"): await agents.Runner.run( @@ -1793,7 +1793,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) send_default_pii=True, ) - items = capture_items("error", "span") + items = capture_items("event", "span") with pytest.raises(InternalServerError, match="Error code: 500"): await agents.Runner.run(agent, "Test input", run_config=test_run_config) @@ -1851,7 +1851,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): assert spans[0]["status"] == "error" (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["contexts"]["trace"]["status"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" @pytest.mark.asyncio @@ -2102,7 +2102,7 @@ async def test_mcp_tool_execution_with_error( assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["attributes"]["gen_ai.tool.output"] is None + assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None" # Verify error status was set assert mcp_tool_span["status"] == "error" From 8bf77f0ed1b351923f1c6fa5956437a952f75c9d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:51:40 +0200 Subject: [PATCH 22/84] fix common tests --- tests/tracing/test_decorator.py | 51 ++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index e73323138a..bbb7e85b1a 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -170,7 +170,6 @@ def my_agent(): item.payload for item in items if item.type == "span" ) - assert agent_span["attributes"]["sentry.op"] == "gen_ai.invoke_agent" assert ( agent_span["name"] == "invoke_agent test_decorator.test_span_templates_ai_dicts..my_agent" @@ -190,7 +189,6 @@ def my_agent(): "thread.name": mock.ANY, } - assert tool_span["attributes"]["sentry.op"] == "gen_ai.execute_tool" assert ( tool_span["name"] == "execute_tool test_decorator.test_span_templates_ai_dicts..my_tool" @@ -214,7 +212,6 @@ def my_agent(): } assert "gen_ai.tool.description" not in tool_span["attributes"] - assert chat_span["attributes"]["sentry.op"] == "gen_ai.chat" assert chat_span["name"] == "chat my-gpt-4o-mini" assert chat_span["attributes"] == { "gen_ai.operation.name": "chat", @@ -243,9 +240,9 @@ def my_agent(): } -def test_span_templates_ai_objects(sentry_init, capture_events): +def test_span_templates_ai_objects(sentry_init, capture_items): sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2): @@ -292,40 +289,54 @@ def my_agent(): with sentry_sdk.start_transaction(name="test-transaction"): my_agent() - (event,) = events - (agent_span, tool_span, chat_span) = event["spans"] + (agent_span, tool_span, chat_span) = ( + item.payload for item in items if item.type == "span" + ) - assert agent_span["op"] == "gen_ai.invoke_agent" assert ( - agent_span["description"] + agent_span["name"] == "invoke_agent test_decorator.test_span_templates_ai_objects..my_agent" ) - assert agent_span["data"] == { + assert agent_span["attributes"] == { "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects..my_agent", "gen_ai.operation.name": "invoke_agent", + "sentry.environment": "production", + "sentry.op": "gen_ai.invoke_agent", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert tool_span["op"] == "gen_ai.execute_tool" assert ( - tool_span["description"] + tool_span["name"] == "execute_tool test_decorator.test_span_templates_ai_objects..my_tool" ) - assert tool_span["data"] == { + assert tool_span["attributes"] == { "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects..my_tool", "gen_ai.tool.description": "This is a tool function.", "gen_ai.operation.name": "execute_tool", "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 20, "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.execute_tool", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert chat_span["op"] == "gen_ai.chat" - assert chat_span["description"] == "chat my-gpt-4o-mini" - assert chat_span["data"] == { + assert chat_span["name"] == "chat my-gpt-4o-mini" + assert chat_span["attributes"] == { "gen_ai.operation.name": "chat", "gen_ai.request.frequency_penalty": 1.0, "gen_ai.request.max_tokens": 100, @@ -339,6 +350,14 @@ def my_agent(): "gen_ai.usage.input_tokens": 11, "gen_ai.usage.output_tokens": 22, "gen_ai.usage.total_tokens": 33, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", "thread.id": mock.ANY, "thread.name": mock.ANY, } From 7c3da4fdab771be2ae50dc741156951230d88c83 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:52:39 +0200 Subject: [PATCH 23/84] client handle None --- sentry_sdk/client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 99e58ec499..356b68e254 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -149,6 +149,12 @@ def _serialized_v1_attribute_to_serialized_v2_attribute( "type": "string", } + if attribute_value is None: + return { + "value": "None", + "type": "string", + } + return None From 06c2a40a6dd723e0a1ed0e6ee7166efe4068e179 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 13:54:05 +0200 Subject: [PATCH 24/84] fix item_count --- sentry_sdk/client.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 356b68e254..0d13b6db03 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1126,23 +1126,21 @@ def capture_event( event_opt["spans"] = non_gen_ai_spans envelope.add_transaction(event_opt) + converted_gen_ai_spans = [ + _serialized_v1_span_to_serialized_v2_span(span, event) + for span in gen_ai_spans + if isinstance(span, dict) + ] + envelope.add_item( Item( type=SpanBatcher.TYPE, content_type=SpanBatcher.CONTENT_TYPE, headers={ - "item_count": len(gen_ai_spans), + "item_count": len(converted_gen_ai_spans), }, payload=PayloadRef( - json={ - "items": [ - _serialized_v1_span_to_serialized_v2_span( - span, event - ) - for span in gen_ai_spans - if isinstance(span, dict) - ] - }, + json={"items": converted_gen_ai_spans}, ), ) ) From 204b9809f6efa06aad3b9f1914d169d1c677e286 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:02:48 +0200 Subject: [PATCH 25/84] fix common tests --- tests/tracing/test_decorator.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index bbb7e85b1a..5f5adec2cb 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -364,9 +364,9 @@ def my_agent(): @pytest.mark.parametrize("send_default_pii", [True, False]) -def test_span_templates_ai_pii(sentry_init, capture_events, send_default_pii): +def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii): sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) - events = capture_events() + items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2, **kwargs): @@ -396,15 +396,14 @@ def my_agent(*args, **kwargs): with sentry_sdk.start_transaction(name="test-transaction"): my_agent(22, 33, arg1=44, arg2=55) - (event,) = events - (_, tool_span, _) = event["spans"] + (_, tool_span, _) = (item.payload for item in items if item.type == "span") if send_default_pii: assert ( - tool_span["data"]["gen_ai.tool.input"] + tool_span["attributes"]["gen_ai.tool.input"] == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}" ) - assert tool_span["data"]["gen_ai.tool.output"] == "'tool_output'" + assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'" else: - assert "gen_ai.tool.input" not in tool_span["data"] - assert "gen_ai.tool.output" not in tool_span["data"] + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] From 00733f960e239bb4a4c606580bc0e9a05f97ec42 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:08:15 +0200 Subject: [PATCH 26/84] fix common tests --- tests/tracing/test_misc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index f69e19791a..bb8d942335 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -625,15 +625,15 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name( span.set_data("gen_ai.operation.name", "chat") spans = [item.payload for item in items if item.type == "span"] - span_data = spans[0]["data"] + span_data = spans[0]["attributes"] assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test" def test_conversation_id_propagates_to_span_with_ai_op( - self, sentry_init, capture_events + self, sentry_init, capture_items ): """Span with ai.* op should get conversation_id.""" sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("span") scope = sentry_sdk.get_current_scope() scope.set_conversation_id("conv-ai-op-test") @@ -642,8 +642,8 @@ def test_conversation_id_propagates_to_span_with_ai_op( with start_span(op="ai.chat.completions"): pass - (event,) = events - span_data = event["spans"][0]["data"] + spans = [item.payload for item in items if item.type == "span"] + span_data = spans[0]["attributes"] assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test" def test_conversation_id_propagates_to_span_with_gen_ai_op( From a54cab4ce7b94624f5de991a1615e632da71f5f9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:16:23 +0200 Subject: [PATCH 27/84] common tests --- tests/tracing/test_misc.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index bb8d942335..8895c98dbc 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -611,11 +611,11 @@ class TestConversationIdPropagation: """Tests for conversation_id propagation to AI spans.""" def test_conversation_id_propagates_to_span_with_gen_ai_operation_name( - self, sentry_init, capture_items + self, sentry_init, capture_events ): """Span with gen_ai.operation.name data should get conversation_id.""" sentry_init(traces_sample_rate=1.0) - items = capture_items("span") + events = capture_events() scope = sentry_sdk.get_current_scope() scope.set_conversation_id("conv-op-name-test") @@ -624,16 +624,16 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name( with start_span(op="http.client") as span: span.set_data("gen_ai.operation.name", "chat") - spans = [item.payload for item in items if item.type == "span"] - span_data = spans[0]["attributes"] + (event,) = events + span_data = event["spans"][0]["data"] assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test" def test_conversation_id_propagates_to_span_with_ai_op( - self, sentry_init, capture_items + self, sentry_init, capture_events ): """Span with ai.* op should get conversation_id.""" sentry_init(traces_sample_rate=1.0) - items = capture_items("span") + events = capture_events() scope = sentry_sdk.get_current_scope() scope.set_conversation_id("conv-ai-op-test") @@ -642,8 +642,8 @@ def test_conversation_id_propagates_to_span_with_ai_op( with start_span(op="ai.chat.completions"): pass - spans = [item.payload for item in items if item.type == "span"] - span_data = spans[0]["attributes"] + (event,) = events + span_data = event["spans"][0]["data"] assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test" def test_conversation_id_propagates_to_span_with_gen_ai_op( From 4b0c47b28f8a4bf62de2e3a0a9d888ba908fe1b8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:24:18 +0200 Subject: [PATCH 28/84] tests --- tests/tracing/test_misc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 8895c98dbc..0188b08a88 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -647,11 +647,11 @@ def test_conversation_id_propagates_to_span_with_ai_op( assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test" def test_conversation_id_propagates_to_span_with_gen_ai_op( - self, sentry_init, capture_events + self, sentry_init, capture_items ): """Span with gen_ai.* op should get conversation_id.""" sentry_init(traces_sample_rate=1.0) - events = capture_events() + items = capture_items("span") scope = sentry_sdk.get_current_scope() scope.set_conversation_id("conv-gen-ai-op-test") @@ -660,8 +660,8 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op( with start_span(op="gen_ai.invoke_agent"): pass - (event,) = events - span_data = event["spans"][0]["data"] + spans = [item.payload for item in items if item.type == "span"] + span_data = spans[0]["attributes"] assert span_data.get("gen_ai.conversation.id") == "conv-gen-ai-op-test" def test_conversation_id_not_propagated_to_non_ai_span( From 6c5c812faa8879523fb4f90c650327a7f70a1d81 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:46:25 +0200 Subject: [PATCH 29/84] add experimental v2 option --- .../integrations/anthropic/test_anthropic.py | 53 ++++++++++++ .../google_genai/test_google_genai.py | 37 ++++++++ .../huggingface_hub/test_huggingface_hub.py | 8 ++ .../integrations/langchain/test_langchain.py | 26 ++++++ tests/integrations/litellm/test_litellm.py | 28 ++++++ tests/integrations/openai/test_openai.py | 42 +++++++++ .../openai_agents/test_openai_agents.py | 32 +++++++ .../pydantic_ai/test_pydantic_ai.py | 85 +++++++++++++++++++ 8 files changed, 311 insertions(+) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index c7fc280b6c..aedab1578b 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -97,6 +97,7 @@ def test_nonstreaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -171,6 +172,7 @@ async def test_nonstreaming_create_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -287,6 +289,7 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -395,6 +398,7 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -498,6 +502,7 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -614,6 +619,7 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -723,6 +729,7 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -831,6 +838,7 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -953,6 +961,7 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1064,6 +1073,7 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1170,6 +1180,7 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1290,6 +1301,7 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1400,6 +1412,7 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1510,6 +1523,7 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1666,6 +1680,7 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1815,6 +1830,7 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1972,6 +1988,7 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2129,6 +2146,7 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2188,6 +2206,7 @@ async def test_stream_message_with_input_json_delta_async( def test_exception_message_create(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction") client = Anthropic(api_key="z") @@ -2210,6 +2229,7 @@ def test_exception_message_create(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2236,6 +2256,7 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_span_status_error_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2262,6 +2283,7 @@ async def test_span_status_error_async(sentry_init, capture_items): @pytest.mark.asyncio async def test_exception_message_create_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") @@ -2286,6 +2308,7 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2316,6 +2339,7 @@ async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2379,6 +2403,7 @@ def test_set_output_data_with_input_json_delta(sentry_init): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with start_transaction(name="test"): @@ -2429,6 +2454,7 @@ def test_anthropic_message_role_mapping( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2475,6 +2501,7 @@ def test_anthropic_message_truncation(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2525,6 +2552,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2585,6 +2613,7 @@ def test_nonstreaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2671,6 +2700,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -2800,6 +2830,7 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2930,6 +2961,7 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3062,6 +3094,7 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3194,6 +3227,7 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3269,6 +3303,7 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3522,6 +3557,7 @@ def test_message_with_base64_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3572,6 +3608,7 @@ def test_message_with_url_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3615,6 +3652,7 @@ def test_message_with_file_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3659,6 +3697,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3703,6 +3742,7 @@ def test_message_with_url_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3746,6 +3786,7 @@ def test_message_with_file_document(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3790,6 +3831,7 @@ def test_message_with_mixed_content(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3872,6 +3914,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3946,6 +3989,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3984,6 +4028,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4019,6 +4064,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4067,6 +4113,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item cache_creation_input_tokens=2846, cache_read_input_tokens=0) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4115,6 +4162,7 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items cache_creation_input_tokens=0, cache_read_input_tokens=2846) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4192,6 +4240,7 @@ def test_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4258,6 +4307,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4291,6 +4341,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): Usage(input_tokens=20, output_tokens=12) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4359,6 +4410,7 @@ def test_cache_tokens_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4419,6 +4471,7 @@ def test_stream_messages_cache_tokens( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index e074b79c8c..ae31fe565b 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -130,6 +130,7 @@ def test_nonstreaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -219,6 +220,7 @@ def test_generate_content_with_system_instruction( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -262,6 +264,7 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -344,6 +347,7 @@ def test_tool_execution(sentry_init, capture_items): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -380,6 +384,7 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -411,6 +416,7 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -527,6 +533,7 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -554,6 +561,7 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_ sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -595,6 +603,7 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -659,6 +668,7 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -698,6 +708,7 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -729,6 +740,7 @@ def test_response_with_different_id_fields( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -771,6 +783,7 @@ def test_tool_with_async_function(sentry_init): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create an async tool function @@ -793,6 +806,7 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -819,6 +833,7 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -905,6 +920,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -980,6 +996,7 @@ def test_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1041,6 +1058,7 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1087,6 +1105,7 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1120,6 +1139,7 @@ def test_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1159,6 +1179,7 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1199,6 +1220,7 @@ async def test_async_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1263,6 +1285,7 @@ async def test_async_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1312,6 +1335,7 @@ async def test_async_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1346,6 +1370,7 @@ async def test_async_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1388,6 +1413,7 @@ async def test_async_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1419,6 +1445,7 @@ def test_generate_content_with_content_object( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1455,6 +1482,7 @@ def test_generate_content_with_dict_format( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1487,6 +1515,7 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1536,6 +1565,7 @@ def test_generate_content_with_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1581,6 +1611,7 @@ def test_generate_content_with_function_response( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1635,6 +1666,7 @@ def test_generate_content_with_mixed_string_and_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1678,6 +1710,7 @@ def test_generate_content_with_part_object_directly( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1716,6 +1749,7 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1752,6 +1786,7 @@ def test_generate_content_with_dict_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1796,6 +1831,7 @@ def test_generate_content_without_parts_property_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1839,6 +1875,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 98abbb00fa..16c27b678d 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -480,6 +480,7 @@ def test_text_generation( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -555,6 +556,7 @@ def test_text_generation_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -631,6 +633,7 @@ def test_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -709,6 +712,7 @@ def test_chat_completion_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -780,6 +784,7 @@ def test_chat_completion_api_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -839,6 +844,7 @@ def test_span_status_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -881,6 +887,7 @@ def test_chat_completion_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -976,6 +983,7 @@ def test_chat_completion_streaming_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index f709d12129..5002d050b9 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -108,6 +108,7 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -216,6 +217,7 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -336,6 +338,7 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -528,6 +531,7 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -865,6 +869,7 @@ def test_langchain_error(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -903,6 +908,7 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -988,6 +994,7 @@ def _identifying_params(self): return {} sentry_init(integrations=[LangchainIntegration()]) + _experiments = ({"gen_ai_as_v2_spans": True},) # Create a manual SentryLangchainCallback manual_callback = SentryLangchainCallback( @@ -1028,6 +1035,7 @@ def test_langchain_callback_manager(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) local_manager = BaseCallbackManager(handlers=[]) @@ -1060,6 +1068,7 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_manager = BaseCallbackManager(handlers=[sentry_callback]) @@ -1092,6 +1101,7 @@ def test_langchain_callback_list(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) local_callbacks = [] @@ -1124,6 +1134,7 @@ def test_langchain_callback_list_existing_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_callbacks = [sentry_callback] @@ -1161,6 +1172,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1298,6 +1310,7 @@ def test_langchain_message_truncation(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1390,6 +1403,7 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1468,6 +1482,7 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1542,6 +1557,7 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1614,6 +1630,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1675,6 +1692,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1724,6 +1742,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items): # Initialize without LangchainIntegration sentry_init(traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -1760,6 +1779,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1817,6 +1837,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1857,6 +1878,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1920,6 +1942,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1973,6 +1996,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2037,6 +2061,7 @@ def test_langchain_response_model_extraction( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2342,6 +2367,7 @@ def test_langchain_ai_system_detection( sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 90807744e7..b9365e7008 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -152,6 +152,7 @@ def test_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -233,6 +234,7 @@ async def test_async_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -316,6 +318,7 @@ def test_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -386,6 +389,7 @@ async def test_async_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -452,6 +456,7 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -521,6 +526,7 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -585,6 +591,7 @@ def test_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -647,6 +654,7 @@ async def test_async_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -709,6 +717,7 @@ def test_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -765,6 +774,7 @@ async def test_async_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -815,6 +825,7 @@ def test_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -853,6 +864,7 @@ async def test_async_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -894,6 +906,7 @@ def test_span_origin( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -941,6 +954,7 @@ def test_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction") @@ -1036,6 +1050,7 @@ async def test_async_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1132,6 +1147,7 @@ def test_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1191,6 +1207,7 @@ async def test_async_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1250,6 +1267,7 @@ def test_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1296,6 +1314,7 @@ async def test_async_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1338,6 +1357,7 @@ def test_response_without_usage(sentry_init, capture_items): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1379,6 +1399,7 @@ def test_integration_setup(sentry_init): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Check that callbacks are registered @@ -1393,6 +1414,7 @@ def test_litellm_message_truncation(sentry_init, capture_items): integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1459,6 +1481,7 @@ def test_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1538,6 +1561,7 @@ async def test_async_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1618,6 +1642,7 @@ def test_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1686,6 +1711,7 @@ async def test_async_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1755,6 +1781,7 @@ def test_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1828,6 +1855,7 @@ async def test_async_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index e53f8e4f55..4c7df84b8b 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -138,6 +138,7 @@ def test_nonstreaming_chat_completion_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -233,6 +234,7 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -312,6 +314,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -407,6 +410,7 @@ async def test_nonstreaming_chat_completion_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -502,6 +506,7 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -621,6 +626,7 @@ def test_streaming_chat_completion_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -701,6 +707,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -764,6 +771,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -829,6 +837,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -957,6 +966,7 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1109,6 +1119,7 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1280,6 +1291,7 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1412,6 +1424,7 @@ async def test_streaming_chat_completion_async( def test_bad_chat_completion(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event") client = OpenAI(api_key="z") @@ -1430,6 +1443,7 @@ def test_bad_chat_completion(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") with start_transaction(name="test"): @@ -1455,6 +1469,7 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_bad_chat_completion_async(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1485,6 +1500,7 @@ def test_embeddings_create_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1567,6 +1583,7 @@ def test_embeddings_create(sentry_init, capture_items, input, request): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1638,6 +1655,7 @@ async def test_embeddings_create_async_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1721,6 +1739,7 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1789,6 +1808,7 @@ def test_embeddings_create_raises_error( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1817,6 +1837,7 @@ async def test_embeddings_create_raises_error_async( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1837,6 +1858,7 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1860,6 +1882,7 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1882,6 +1905,7 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1945,6 +1969,7 @@ async def test_span_origin_streaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2011,6 +2036,7 @@ def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2042,6 +2068,7 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2435,6 +2462,7 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2557,6 +2585,7 @@ def test_ai_client_span_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2767,6 +2796,7 @@ def test_error_in_responses_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -2873,6 +2903,7 @@ async def test_ai_client_span_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3158,6 +3189,7 @@ async def test_ai_client_span_streaming_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3383,6 +3415,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -3510,6 +3543,7 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3586,6 +3620,7 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3649,6 +3684,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3691,6 +3727,7 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3721,6 +3758,7 @@ def test_openai_message_truncation(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3770,6 +3808,7 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3848,6 +3887,7 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3924,6 +3964,7 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3973,6 +4014,7 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 294812b0ca..9e74848a04 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -182,6 +182,7 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -339,6 +340,7 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -523,6 +525,7 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -570,6 +573,7 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -721,6 +725,7 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -963,6 +968,7 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response): sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1092,6 +1098,7 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1190,6 +1197,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1418,6 +1426,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", + _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -1580,6 +1589,7 @@ async def test_hosted_mcp_tool_propagation_headers( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", + _experiments={"gen_ai_as_v2_spans": True}, ) response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True) @@ -1678,6 +1688,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -1726,6 +1737,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span", "transaction") @@ -1791,6 +1803,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -1835,6 +1848,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1948,6 +1962,7 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2078,6 +2093,7 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2202,6 +2218,7 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2259,6 +2276,7 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2302,6 +2320,7 @@ def test_openai_agents_message_role_mapping( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) get_response_kwargs = {"input": [test_message]} @@ -2401,6 +2420,7 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2498,6 +2518,7 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2591,6 +2612,7 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2679,6 +2701,7 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2801,6 +2824,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2889,6 +2913,7 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3023,6 +3048,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3065,6 +3091,7 @@ def test_openai_agents_message_truncation(sentry_init, capture_items): integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_messages = [ @@ -3111,6 +3138,7 @@ async def test_streaming_span_update_captures_response_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent) @@ -3176,6 +3204,7 @@ async def test_streaming_ttft_on_chat_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -3330,6 +3359,7 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3468,6 +3498,7 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3531,6 +3562,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index fe34dd0f5d..bab2f6208d 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -61,6 +61,7 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -102,6 +103,7 @@ async def test_agent_run_async_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -135,6 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -179,6 +182,7 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -211,6 +215,7 @@ def test_agent_run_sync_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -244,6 +249,7 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -288,6 +294,7 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -322,6 +329,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -387,6 +395,7 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) retries = 0 @@ -470,6 +479,7 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -534,6 +544,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -583,6 +594,7 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -631,6 +643,7 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -676,6 +689,7 @@ async def test_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -700,6 +714,7 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -729,6 +744,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -765,6 +781,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -799,6 +816,7 @@ async def test_message_history(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -848,6 +866,7 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -878,6 +897,7 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent) integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -907,6 +927,7 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -938,6 +959,7 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -975,6 +997,7 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1066,6 +1089,7 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1135,6 +1159,7 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1158,6 +1183,7 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1182,6 +1208,7 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1208,6 +1235,7 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1242,6 +1270,7 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a second agent @@ -1297,6 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1347,6 +1377,7 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1386,6 +1417,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1412,6 +1444,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items) sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1440,6 +1473,7 @@ async def test_model_settings_object_style(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1475,6 +1509,7 @@ async def test_usage_data_partial(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1508,6 +1543,7 @@ async def test_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1530,6 +1566,7 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1564,6 +1601,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1609,6 +1647,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1657,6 +1696,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1684,6 +1724,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1709,6 +1750,7 @@ async def test_agent_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1734,6 +1776,7 @@ async def test_model_response_without_parts(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1764,6 +1807,7 @@ async def test_input_messages_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1793,6 +1837,7 @@ async def test_available_tools_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1822,6 +1867,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1848,6 +1894,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1889,6 +1936,7 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1918,6 +1966,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1954,6 +2003,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1993,6 +2043,7 @@ async def test_output_data_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2025,6 +2076,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2060,6 +2112,7 @@ async def test_message_with_instructions(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2094,6 +2147,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2122,6 +2176,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2149,6 +2204,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create model with callable name that raises exception @@ -2172,6 +2228,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Pass a string as model @@ -2191,6 +2248,7 @@ async def test_get_model_name_with_none(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Pass None @@ -2212,6 +2270,7 @@ async def test_set_model_data_with_system(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2243,6 +2302,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2276,6 +2336,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2308,6 +2369,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) # Should return False @@ -2326,6 +2388,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2352,6 +2415,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2384,6 +2448,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2414,6 +2479,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2444,6 +2510,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2483,6 +2550,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2509,6 +2577,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2536,6 +2605,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2560,6 +2630,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2583,6 +2654,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Update with None span - should not raise @@ -2607,6 +2679,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2630,6 +2703,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a simple agent with no tools (won't have function_toolset) @@ -2661,6 +2735,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_ integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2694,6 +2769,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2722,6 +2798,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2749,6 +2826,7 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2797,6 +2875,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2827,6 +2906,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items) integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2870,6 +2950,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2894,6 +2975,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): """Test that cache_read_tokens and cache_write_tokens are tracked.""" sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0) + _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") @@ -2964,6 +3046,7 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3034,6 +3117,7 @@ async def test_invoke_agent_image_url( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) agent = Agent("test", name="test_image_url_agent") @@ -3081,6 +3165,7 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") From 51a07fff893c5c552de1950239b4a064dc48b828 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:47:07 +0200 Subject: [PATCH 30/84] push experiment --- sentry_sdk/consts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 73e5a6d9cb..82107b49ee 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -86,6 +86,7 @@ class CompressionAlgo(Enum): "trace_lifecycle": Optional[Literal["static", "stream"]], "ignore_spans": Optional[IgnoreSpansConfig], "suppress_asgi_chained_exceptions": Optional[bool], + "gen_ai_as_v2_spans": Optional[bool], }, total=False, ) From bab75670df741b84c3b17b8b615786705abdbabc Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:52:13 +0200 Subject: [PATCH 31/84] fix tests --- tests/tracing/test_decorator.py | 16 +++++++++++++--- tests/tracing/test_misc.py | 5 ++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index 5f5adec2cb..d370b4bbc9 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -122,7 +122,10 @@ async def _some_function_traced(a, b, c): def test_span_templates_ai_dicts(sentry_init, capture_items): - sentry_init(traces_sample_rate=1.0) + sentry_init( + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) @@ -241,7 +244,10 @@ def my_agent(): def test_span_templates_ai_objects(sentry_init, capture_items): - sentry_init(traces_sample_rate=1.0) + sentry_init( + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) @@ -365,7 +371,11 @@ def my_agent(): @pytest.mark.parametrize("send_default_pii", [True, False]) def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii): - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("span") @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 0188b08a88..4209a02b4b 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -650,7 +650,10 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op( self, sentry_init, capture_items ): """Span with gen_ai.* op should get conversation_id.""" - sentry_init(traces_sample_rate=1.0) + sentry_init( + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("span") scope = sentry_sdk.get_current_scope() From 3e5579506264719625225e62271ab612c57afdc8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:53:49 +0200 Subject: [PATCH 32/84] client changes --- sentry_sdk/client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 0d13b6db03..f8bc071545 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1113,10 +1113,14 @@ def capture_event( envelope = Envelope(headers=headers) - if is_transaction: + if is_transaction and not self.options["_experiments"].get( + "gen_ai_as_v2_spans", False + ): if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) + envelope.add_transaction(event_opt) + elif is_transaction: split_spans = _split_gen_ai_spans(event_opt) if split_spans is None or not split_spans[1]: envelope.add_transaction(event_opt) From 6d1d7edce94a5c20be9d32470ca1a385c0d199be Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 14:55:22 +0200 Subject: [PATCH 33/84] simplify client logic --- sentry_sdk/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index f8bc071545..87504c94b1 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1113,12 +1113,12 @@ def capture_event( envelope = Envelope(headers=headers) + if is_transaction and isinstance(profile, Profile): + envelope.add_profile(profile.to_json(event_opt, self.options)) + if is_transaction and not self.options["_experiments"].get( "gen_ai_as_v2_spans", False ): - if isinstance(profile, Profile): - envelope.add_profile(profile.to_json(event_opt, self.options)) - envelope.add_transaction(event_opt) elif is_transaction: split_spans = _split_gen_ai_spans(event_opt) From 6bf400680527c779dc13421df181daab2fb09e7e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 15:07:42 +0200 Subject: [PATCH 34/84] Revert "add experimental v2 option" This reverts commit 6c5c812faa8879523fb4f90c650327a7f70a1d81. --- .../integrations/anthropic/test_anthropic.py | 53 ------------ .../google_genai/test_google_genai.py | 37 -------- .../huggingface_hub/test_huggingface_hub.py | 8 -- .../integrations/langchain/test_langchain.py | 26 ------ tests/integrations/litellm/test_litellm.py | 28 ------ tests/integrations/openai/test_openai.py | 42 --------- .../openai_agents/test_openai_agents.py | 32 ------- .../pydantic_ai/test_pydantic_ai.py | 85 ------------------- 8 files changed, 311 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index aedab1578b..c7fc280b6c 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -97,7 +97,6 @@ def test_nonstreaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -172,7 +171,6 @@ async def test_nonstreaming_create_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -289,7 +287,6 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -398,7 +395,6 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -502,7 +498,6 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -619,7 +614,6 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -729,7 +723,6 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -838,7 +831,6 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -961,7 +953,6 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1073,7 +1064,6 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1180,7 +1170,6 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1301,7 +1290,6 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1412,7 +1400,6 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1523,7 +1510,6 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1680,7 +1666,6 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1830,7 +1815,6 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1988,7 +1972,6 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2146,7 +2129,6 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2206,7 +2188,6 @@ async def test_stream_message_with_input_json_delta_async( def test_exception_message_create(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction") client = Anthropic(api_key="z") @@ -2229,7 +2210,6 @@ def test_exception_message_create(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2256,7 +2236,6 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_span_status_error_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2283,7 +2262,6 @@ async def test_span_status_error_async(sentry_init, capture_items): @pytest.mark.asyncio async def test_exception_message_create_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") @@ -2308,7 +2286,6 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2339,7 +2316,6 @@ async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2403,7 +2379,6 @@ def test_set_output_data_with_input_json_delta(sentry_init): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with start_transaction(name="test"): @@ -2454,7 +2429,6 @@ def test_anthropic_message_role_mapping( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2501,7 +2475,6 @@ def test_anthropic_message_truncation(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2552,7 +2525,6 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2613,7 +2585,6 @@ def test_nonstreaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2700,7 +2671,6 @@ async def test_nonstreaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -2830,7 +2800,6 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2961,7 +2930,6 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3094,7 +3062,6 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3227,7 +3194,6 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3303,7 +3269,6 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3557,7 +3522,6 @@ def test_message_with_base64_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3608,7 +3572,6 @@ def test_message_with_url_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3652,7 +3615,6 @@ def test_message_with_file_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3697,7 +3659,6 @@ def test_message_with_base64_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3742,7 +3703,6 @@ def test_message_with_url_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3786,7 +3746,6 @@ def test_message_with_file_document(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3831,7 +3790,6 @@ def test_message_with_mixed_content(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3914,7 +3872,6 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3989,7 +3946,6 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4028,7 +3984,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4064,7 +4019,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4113,7 +4067,6 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item cache_creation_input_tokens=2846, cache_read_input_tokens=0) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4162,7 +4115,6 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items cache_creation_input_tokens=0, cache_read_input_tokens=2846) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4240,7 +4192,6 @@ def test_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4307,7 +4258,6 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4341,7 +4291,6 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): Usage(input_tokens=20, output_tokens=12) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4410,7 +4359,6 @@ def test_cache_tokens_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -4471,7 +4419,6 @@ def test_stream_messages_cache_tokens( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ae31fe565b..e074b79c8c 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -130,7 +130,6 @@ def test_nonstreaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -220,7 +219,6 @@ def test_generate_content_with_system_instruction( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -264,7 +262,6 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -347,7 +344,6 @@ def test_tool_execution(sentry_init, capture_items): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -384,7 +380,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -416,7 +411,6 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -533,7 +527,6 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -561,7 +554,6 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_ sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -603,7 +595,6 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -668,7 +659,6 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -708,7 +698,6 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -740,7 +729,6 @@ def test_response_with_different_id_fields( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -783,7 +771,6 @@ def test_tool_with_async_function(sentry_init): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create an async tool function @@ -806,7 +793,6 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -833,7 +819,6 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -920,7 +905,6 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -996,7 +980,6 @@ def test_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1058,7 +1041,6 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1105,7 +1087,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1139,7 +1120,6 @@ def test_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1179,7 +1159,6 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1220,7 +1199,6 @@ async def test_async_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1285,7 +1263,6 @@ async def test_async_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1335,7 +1312,6 @@ async def test_async_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1370,7 +1346,6 @@ async def test_async_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1413,7 +1388,6 @@ async def test_async_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1445,7 +1419,6 @@ def test_generate_content_with_content_object( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1482,7 +1455,6 @@ def test_generate_content_with_dict_format( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1515,7 +1487,6 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1565,7 +1536,6 @@ def test_generate_content_with_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1611,7 +1581,6 @@ def test_generate_content_with_function_response( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1666,7 +1635,6 @@ def test_generate_content_with_mixed_string_and_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1710,7 +1678,6 @@ def test_generate_content_with_part_object_directly( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1749,7 +1716,6 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1786,7 +1752,6 @@ def test_generate_content_with_dict_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1831,7 +1796,6 @@ def test_generate_content_without_parts_property_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1875,7 +1839,6 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 16c27b678d..98abbb00fa 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -480,7 +480,6 @@ def test_text_generation( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -556,7 +555,6 @@ def test_text_generation_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -633,7 +631,6 @@ def test_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -712,7 +709,6 @@ def test_chat_completion_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -784,7 +780,6 @@ def test_chat_completion_api_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -844,7 +839,6 @@ def test_span_status_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: sentry_init(traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -887,7 +881,6 @@ def test_chat_completion_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -983,7 +976,6 @@ def test_chat_completion_streaming_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 5002d050b9..f709d12129 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -108,7 +108,6 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -217,7 +216,6 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -338,7 +336,6 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -531,7 +528,6 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -869,7 +865,6 @@ def test_langchain_error(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -908,7 +903,6 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -994,7 +988,6 @@ def _identifying_params(self): return {} sentry_init(integrations=[LangchainIntegration()]) - _experiments = ({"gen_ai_as_v2_spans": True},) # Create a manual SentryLangchainCallback manual_callback = SentryLangchainCallback( @@ -1035,7 +1028,6 @@ def test_langchain_callback_manager(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) local_manager = BaseCallbackManager(handlers=[]) @@ -1068,7 +1060,6 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_manager = BaseCallbackManager(handlers=[sentry_callback]) @@ -1101,7 +1092,6 @@ def test_langchain_callback_list(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) local_callbacks = [] @@ -1134,7 +1124,6 @@ def test_langchain_callback_list_existing_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_callbacks = [sentry_callback] @@ -1172,7 +1161,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1310,7 +1298,6 @@ def test_langchain_message_truncation(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1403,7 +1390,6 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1482,7 +1468,6 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1557,7 +1542,6 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1630,7 +1614,6 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1692,7 +1675,6 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1742,7 +1724,6 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items): # Initialize without LangchainIntegration sentry_init(traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") with mock.patch.object( @@ -1779,7 +1760,6 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1837,7 +1817,6 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1878,7 +1857,6 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1942,7 +1920,6 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1996,7 +1973,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2061,7 +2037,6 @@ def test_langchain_response_model_extraction( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2367,7 +2342,6 @@ def test_langchain_ai_system_detection( sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b9365e7008..90807744e7 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -152,7 +152,6 @@ def test_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -234,7 +233,6 @@ async def test_async_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -318,7 +316,6 @@ def test_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -389,7 +386,6 @@ async def test_async_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -456,7 +452,6 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -526,7 +521,6 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -591,7 +585,6 @@ def test_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -654,7 +647,6 @@ async def test_async_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -717,7 +709,6 @@ def test_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -774,7 +765,6 @@ async def test_async_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -825,7 +815,6 @@ def test_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -864,7 +853,6 @@ async def test_async_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -906,7 +894,6 @@ def test_span_origin( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -954,7 +941,6 @@ def test_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction") @@ -1050,7 +1036,6 @@ async def test_async_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1147,7 +1132,6 @@ def test_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1207,7 +1191,6 @@ async def test_async_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1267,7 +1250,6 @@ def test_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1314,7 +1296,6 @@ async def test_async_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1357,7 +1338,6 @@ def test_response_without_usage(sentry_init, capture_items): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1399,7 +1379,6 @@ def test_integration_setup(sentry_init): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Check that callbacks are registered @@ -1414,7 +1393,6 @@ def test_litellm_message_truncation(sentry_init, capture_items): integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1481,7 +1459,6 @@ def test_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1561,7 +1538,6 @@ async def test_async_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1642,7 +1618,6 @@ def test_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1711,7 +1686,6 @@ async def test_async_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1781,7 +1755,6 @@ def test_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1855,7 +1828,6 @@ async def test_async_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4c7df84b8b..e53f8e4f55 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -138,7 +138,6 @@ def test_nonstreaming_chat_completion_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -234,7 +233,6 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -314,7 +312,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -410,7 +407,6 @@ async def test_nonstreaming_chat_completion_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -506,7 +502,6 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -626,7 +621,6 @@ def test_streaming_chat_completion_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -707,7 +701,6 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -771,7 +764,6 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -837,7 +829,6 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -966,7 +957,6 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1119,7 +1109,6 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1291,7 +1280,6 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1424,7 +1412,6 @@ async def test_streaming_chat_completion_async( def test_bad_chat_completion(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event") client = OpenAI(api_key="z") @@ -1443,7 +1430,6 @@ def test_bad_chat_completion(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event", "transaction", "span") with start_transaction(name="test"): @@ -1469,7 +1455,6 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_bad_chat_completion_async(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1500,7 +1485,6 @@ def test_embeddings_create_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1583,7 +1567,6 @@ def test_embeddings_create(sentry_init, capture_items, input, request): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1655,7 +1638,6 @@ async def test_embeddings_create_async_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1739,7 +1721,6 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1808,7 +1789,6 @@ def test_embeddings_create_raises_error( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1837,7 +1817,6 @@ async def test_embeddings_create_raises_error_async( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1858,7 +1837,6 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1882,7 +1860,6 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1905,7 +1882,6 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1969,7 +1945,6 @@ async def test_span_origin_streaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2036,7 +2011,6 @@ def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2068,7 +2042,6 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2462,7 +2435,6 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2585,7 +2557,6 @@ def test_ai_client_span_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2796,7 +2767,6 @@ def test_error_in_responses_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -2903,7 +2873,6 @@ async def test_ai_client_span_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3189,7 +3158,6 @@ async def test_ai_client_span_streaming_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3415,7 +3383,6 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -3543,7 +3510,6 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3620,7 +3586,6 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3684,7 +3649,6 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3727,7 +3691,6 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3758,7 +3721,6 @@ def test_openai_message_truncation(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3808,7 +3770,6 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3887,7 +3848,6 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3964,7 +3924,6 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -4014,7 +3973,6 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9e74848a04..294812b0ca 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -182,7 +182,6 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -340,7 +339,6 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -525,7 +523,6 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -573,7 +570,6 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -725,7 +721,6 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -968,7 +963,6 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response): sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1098,7 +1092,6 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1197,7 +1190,6 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1426,7 +1418,6 @@ async def test_hosted_mcp_tool_propagation_header_streamed( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", - _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -1589,7 +1580,6 @@ async def test_hosted_mcp_tool_propagation_headers( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", - _experiments={"gen_ai_as_v2_spans": True}, ) response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True) @@ -1688,7 +1678,6 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -1737,7 +1726,6 @@ async def test_error_handling(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span", "transaction") @@ -1803,7 +1791,6 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -1848,7 +1835,6 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1962,7 +1948,6 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2093,7 +2078,6 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2218,7 +2202,6 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2276,7 +2259,6 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2320,7 +2302,6 @@ def test_openai_agents_message_role_mapping( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) get_response_kwargs = {"input": [test_message]} @@ -2420,7 +2401,6 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2518,7 +2498,6 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2612,7 +2591,6 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2701,7 +2679,6 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2824,7 +2801,6 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2913,7 +2889,6 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3048,7 +3023,6 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3091,7 +3065,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_items): integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_messages = [ @@ -3138,7 +3111,6 @@ async def test_streaming_span_update_captures_response_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent) @@ -3204,7 +3176,6 @@ async def test_streaming_ttft_on_chat_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -3359,7 +3330,6 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3498,7 +3468,6 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3562,7 +3531,6 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index bab2f6208d..fe34dd0f5d 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -61,7 +61,6 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -103,7 +102,6 @@ async def test_agent_run_async_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -137,7 +135,6 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -182,7 +179,6 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -215,7 +211,6 @@ def test_agent_run_sync_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -249,7 +244,6 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -294,7 +288,6 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -329,7 +322,6 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -395,7 +387,6 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) retries = 0 @@ -479,7 +470,6 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -544,7 +534,6 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -594,7 +583,6 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -643,7 +631,6 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -689,7 +676,6 @@ async def test_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -714,7 +700,6 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -744,7 +729,6 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -781,7 +765,6 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -816,7 +799,6 @@ async def test_message_history(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -866,7 +848,6 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -897,7 +878,6 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent) integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -927,7 +907,6 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -959,7 +938,6 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -997,7 +975,6 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1089,7 +1066,6 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1159,7 +1135,6 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1183,7 +1158,6 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1208,7 +1182,6 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1235,7 +1208,6 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1270,7 +1242,6 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a second agent @@ -1326,7 +1297,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1377,7 +1347,6 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1417,7 +1386,6 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1444,7 +1412,6 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items) sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1473,7 +1440,6 @@ async def test_model_settings_object_style(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1509,7 +1475,6 @@ async def test_usage_data_partial(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1543,7 +1508,6 @@ async def test_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1566,7 +1530,6 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1601,7 +1564,6 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1647,7 +1609,6 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1696,7 +1657,6 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1724,7 +1684,6 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1750,7 +1709,6 @@ async def test_agent_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1776,7 +1734,6 @@ async def test_model_response_without_parts(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1807,7 +1764,6 @@ async def test_input_messages_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1837,7 +1793,6 @@ async def test_available_tools_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1867,7 +1822,6 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1894,7 +1848,6 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1936,7 +1889,6 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1966,7 +1918,6 @@ async def test_message_parts_with_list_content(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2003,7 +1954,6 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2043,7 +1993,6 @@ async def test_output_data_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2076,7 +2025,6 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2112,7 +2060,6 @@ async def test_message_with_instructions(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2147,7 +2094,6 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2176,7 +2122,6 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2204,7 +2149,6 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create model with callable name that raises exception @@ -2228,7 +2172,6 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Pass a string as model @@ -2248,7 +2191,6 @@ async def test_get_model_name_with_none(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Pass None @@ -2270,7 +2212,6 @@ async def test_set_model_data_with_system(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2302,7 +2243,6 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2336,7 +2276,6 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2369,7 +2308,6 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) # Should return False @@ -2388,7 +2326,6 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2415,7 +2352,6 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2448,7 +2384,6 @@ async def test_set_agent_data_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2479,7 +2414,6 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2510,7 +2444,6 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2550,7 +2483,6 @@ async def test_execute_tool_span_creation(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2577,7 +2509,6 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2605,7 +2536,6 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2630,7 +2560,6 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2654,7 +2583,6 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Update with None span - should not raise @@ -2679,7 +2607,6 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2703,7 +2630,6 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a simple agent with no tools (won't have function_toolset) @@ -2735,7 +2661,6 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_ integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2769,7 +2694,6 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2798,7 +2722,6 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2826,7 +2749,6 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2875,7 +2797,6 @@ async def test_binary_content_encoding_image(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2906,7 +2827,6 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items) integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2950,7 +2870,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2975,7 +2894,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): """Test that cache_read_tokens and cache_write_tokens are tracked.""" sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0) - _experiments = ({"gen_ai_as_v2_spans": True},) items = capture_items("transaction", "span") @@ -3046,7 +2964,6 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3117,7 +3034,6 @@ async def test_invoke_agent_image_url( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) agent = Agent("test", name="test_image_url_agent") @@ -3165,7 +3081,6 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") From 700e8a17934b20734797472a9270e054b8c1bb90 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 15:09:05 +0200 Subject: [PATCH 35/84] retry adding experimental option to tests --- .../integrations/anthropic/test_anthropic.py | 113 ++++++++++++++++-- .../google_genai/test_google_genai.py | 37 ++++++ .../huggingface_hub/test_huggingface_hub.py | 10 +- .../integrations/langchain/test_langchain.py | 30 ++++- tests/integrations/litellm/test_litellm.py | 28 +++++ tests/integrations/openai/test_openai.py | 57 ++++++++- .../openai_agents/test_openai_agents.py | 32 +++++ .../pydantic_ai/test_pydantic_ai.py | 90 +++++++++++++- 8 files changed, 377 insertions(+), 20 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index c7fc280b6c..b19cca9347 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -97,6 +97,7 @@ def test_nonstreaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -171,6 +172,7 @@ async def test_nonstreaming_create_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -287,6 +289,7 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -395,6 +398,7 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -498,6 +502,7 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -614,6 +619,7 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -723,6 +729,7 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -831,6 +838,7 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -953,6 +961,7 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1064,6 +1073,7 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1170,6 +1180,7 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1290,6 +1301,7 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1400,6 +1412,7 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1510,6 +1523,7 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1666,6 +1680,7 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1815,6 +1830,7 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1972,6 +1988,7 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2129,6 +2146,7 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2187,7 +2205,11 @@ async def test_stream_message_with_input_json_delta_async( def test_exception_message_create(sentry_init, capture_items): - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event", "transaction") client = Anthropic(api_key="z") @@ -2209,7 +2231,11 @@ def test_exception_message_create(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2235,7 +2261,11 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_span_status_error_async(sentry_init, capture_items): - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event", "span") with start_transaction(name="anthropic"): @@ -2261,7 +2291,11 @@ async def test_span_status_error_async(sentry_init, capture_items): @pytest.mark.asyncio async def test_exception_message_create_async(sentry_init, capture_items): - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") @@ -2286,6 +2320,7 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2316,6 +2351,7 @@ async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2379,6 +2415,7 @@ def test_set_output_data_with_input_json_delta(sentry_init): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with start_transaction(name="test"): @@ -2429,6 +2466,7 @@ def test_anthropic_message_role_mapping( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2475,6 +2513,7 @@ def test_anthropic_message_truncation(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2525,6 +2564,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2585,6 +2625,7 @@ def test_nonstreaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2671,6 +2712,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -2800,6 +2842,7 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2930,6 +2973,7 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3062,6 +3106,7 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3194,6 +3239,7 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3269,6 +3315,7 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3522,6 +3569,7 @@ def test_message_with_base64_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3572,6 +3620,7 @@ def test_message_with_url_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3615,6 +3664,7 @@ def test_message_with_file_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3659,6 +3709,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3703,6 +3754,7 @@ def test_message_with_url_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3746,6 +3798,7 @@ def test_message_with_file_document(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3790,6 +3843,7 @@ def test_message_with_mixed_content(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3872,6 +3926,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3946,6 +4001,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3984,6 +4040,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4018,7 +4075,11 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4066,7 +4127,11 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item Usage(input_tokens=19, output_tokens=14, cache_creation_input_tokens=2846, cache_read_input_tokens=0) """ - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4114,7 +4179,11 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items Usage(input_tokens=19, output_tokens=14, cache_creation_input_tokens=0, cache_read_input_tokens=2846) """ - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4191,7 +4260,11 @@ def test_input_tokens_include_cache_read_streaming( ) ) - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") with mock.patch.object( @@ -4257,7 +4330,11 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( ) ) - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") with mock.patch.object( @@ -4290,7 +4367,11 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): Real Anthropic response (from E2E test, simple call without caching): Usage(input_tokens=20, output_tokens=12) """ - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4358,7 +4439,11 @@ def test_cache_tokens_streaming( ) ) - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") with mock.patch.object( @@ -4418,7 +4503,11 @@ def test_stream_messages_cache_tokens( ) ) - sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") with mock.patch.object( diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index e074b79c8c..ae31fe565b 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -130,6 +130,7 @@ def test_nonstreaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -219,6 +220,7 @@ def test_generate_content_with_system_instruction( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -262,6 +264,7 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -344,6 +347,7 @@ def test_tool_execution(sentry_init, capture_items): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -380,6 +384,7 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -411,6 +416,7 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -527,6 +533,7 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -554,6 +561,7 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_ sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -595,6 +603,7 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -659,6 +668,7 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -698,6 +708,7 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -729,6 +740,7 @@ def test_response_with_different_id_fields( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -771,6 +783,7 @@ def test_tool_with_async_function(sentry_init): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create an async tool function @@ -793,6 +806,7 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -819,6 +833,7 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -905,6 +920,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -980,6 +996,7 @@ def test_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1041,6 +1058,7 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1087,6 +1105,7 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1120,6 +1139,7 @@ def test_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1159,6 +1179,7 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1199,6 +1220,7 @@ async def test_async_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1263,6 +1285,7 @@ async def test_async_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1312,6 +1335,7 @@ async def test_async_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1346,6 +1370,7 @@ async def test_async_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1388,6 +1413,7 @@ async def test_async_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1419,6 +1445,7 @@ def test_generate_content_with_content_object( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1455,6 +1482,7 @@ def test_generate_content_with_dict_format( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1487,6 +1515,7 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1536,6 +1565,7 @@ def test_generate_content_with_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1581,6 +1611,7 @@ def test_generate_content_with_function_response( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1635,6 +1666,7 @@ def test_generate_content_with_mixed_string_and_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1678,6 +1710,7 @@ def test_generate_content_with_part_object_directly( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1716,6 +1749,7 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1752,6 +1786,7 @@ def test_generate_content_with_dict_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1796,6 +1831,7 @@ def test_generate_content_without_parts_property_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1839,6 +1875,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 98abbb00fa..eaac8c1ab1 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -480,6 +480,7 @@ def test_text_generation( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -555,6 +556,7 @@ def test_text_generation_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -631,6 +633,7 @@ def test_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -709,6 +712,7 @@ def test_chat_completion_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -779,7 +783,7 @@ def test_chat_completion_streaming( def test_chat_completion_api_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: - sentry_init(traces_sample_rate=1.0) + sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -838,7 +842,7 @@ def test_chat_completion_api_error( def test_span_status_error( sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" ) -> None: - sentry_init(traces_sample_rate=1.0) + sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) items = capture_items("event", "transaction", "span") client = get_hf_provider_inference_client() @@ -881,6 +885,7 @@ def test_chat_completion_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -976,6 +981,7 @@ def test_chat_completion_streaming_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index f709d12129..ef27d45767 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -108,6 +108,7 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -216,6 +217,7 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -336,6 +338,7 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -528,6 +531,7 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -865,6 +869,7 @@ def test_langchain_error(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -903,6 +908,7 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -987,7 +993,9 @@ def _llm_type(self): def _identifying_params(self): return {} - sentry_init(integrations=[LangchainIntegration()]) + sentry_init( + integrations=[LangchainIntegration()], _experiments={"gen_ai_as_v2_spans": True} + ) # Create a manual SentryLangchainCallback manual_callback = SentryLangchainCallback( @@ -1028,6 +1036,7 @@ def test_langchain_callback_manager(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) local_manager = BaseCallbackManager(handlers=[]) @@ -1060,6 +1069,7 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_manager = BaseCallbackManager(handlers=[sentry_callback]) @@ -1092,6 +1102,7 @@ def test_langchain_callback_list(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) local_callbacks = [] @@ -1124,6 +1135,7 @@ def test_langchain_callback_list_existing_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_callbacks = [sentry_callback] @@ -1161,6 +1173,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1298,6 +1311,7 @@ def test_langchain_message_truncation(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1390,6 +1404,7 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1468,6 +1483,7 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1542,6 +1558,7 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1614,6 +1631,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1675,6 +1693,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1723,7 +1742,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items): pytest.skip("langchain_openai not installed") # Initialize without LangchainIntegration - sentry_init(traces_sample_rate=1.0) + sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) items = capture_items("transaction", "span") with mock.patch.object( @@ -1760,6 +1779,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1817,6 +1837,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1857,6 +1878,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1920,6 +1942,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1973,6 +1996,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2037,6 +2061,7 @@ def test_langchain_response_model_extraction( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2342,6 +2367,7 @@ def test_langchain_ai_system_detection( sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 90807744e7..b9365e7008 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -152,6 +152,7 @@ def test_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -233,6 +234,7 @@ async def test_async_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -316,6 +318,7 @@ def test_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -386,6 +389,7 @@ async def test_async_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -452,6 +456,7 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -521,6 +526,7 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -585,6 +591,7 @@ def test_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -647,6 +654,7 @@ async def test_async_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -709,6 +717,7 @@ def test_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -765,6 +774,7 @@ async def test_async_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -815,6 +825,7 @@ def test_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -853,6 +864,7 @@ async def test_async_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -894,6 +906,7 @@ def test_span_origin( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -941,6 +954,7 @@ def test_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction") @@ -1036,6 +1050,7 @@ async def test_async_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1132,6 +1147,7 @@ def test_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1191,6 +1207,7 @@ async def test_async_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1250,6 +1267,7 @@ def test_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1296,6 +1314,7 @@ async def test_async_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1338,6 +1357,7 @@ def test_response_without_usage(sentry_init, capture_items): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1379,6 +1399,7 @@ def test_integration_setup(sentry_init): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Check that callbacks are registered @@ -1393,6 +1414,7 @@ def test_litellm_message_truncation(sentry_init, capture_items): integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1459,6 +1481,7 @@ def test_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1538,6 +1561,7 @@ async def test_async_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1618,6 +1642,7 @@ def test_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1686,6 +1711,7 @@ async def test_async_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1755,6 +1781,7 @@ def test_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1828,6 +1855,7 @@ async def test_async_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index e53f8e4f55..c4d77db5c8 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -138,6 +138,7 @@ def test_nonstreaming_chat_completion_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -233,6 +234,7 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -312,6 +314,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -407,6 +410,7 @@ async def test_nonstreaming_chat_completion_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -502,6 +506,7 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -621,6 +626,7 @@ def test_streaming_chat_completion_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -701,6 +707,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -764,6 +771,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -829,6 +837,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -957,6 +966,7 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1109,6 +1119,7 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1280,6 +1291,7 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1411,7 +1423,11 @@ async def test_streaming_chat_completion_async( def test_bad_chat_completion(sentry_init, capture_items): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event") client = OpenAI(api_key="z") @@ -1429,7 +1445,11 @@ def test_bad_chat_completion(sentry_init, capture_items): def test_span_status_error(sentry_init, capture_items): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event", "transaction", "span") with start_transaction(name="test"): @@ -1454,7 +1474,11 @@ def test_span_status_error(sentry_init, capture_items): @pytest.mark.asyncio async def test_bad_chat_completion_async(sentry_init, capture_items): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1485,6 +1509,7 @@ def test_embeddings_create_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1567,6 +1592,7 @@ def test_embeddings_create(sentry_init, capture_items, input, request): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1638,6 +1664,7 @@ async def test_embeddings_create_async_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1721,6 +1748,7 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1789,6 +1817,7 @@ def test_embeddings_create_raises_error( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1817,6 +1846,7 @@ async def test_embeddings_create_raises_error_async( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1837,6 +1867,7 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1860,6 +1891,7 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1882,6 +1914,7 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1945,6 +1978,7 @@ async def test_span_origin_streaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2011,6 +2045,7 @@ def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2042,6 +2077,7 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2435,6 +2471,7 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2557,6 +2594,7 @@ def test_ai_client_span_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2767,6 +2805,7 @@ def test_error_in_responses_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -2873,6 +2912,7 @@ async def test_ai_client_span_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3158,6 +3198,7 @@ async def test_ai_client_span_streaming_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3383,6 +3424,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -3510,6 +3552,7 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3586,6 +3629,7 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3649,6 +3693,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3691,6 +3736,7 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3721,6 +3767,7 @@ def test_openai_message_truncation(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3770,6 +3817,7 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3848,6 +3896,7 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3924,6 +3973,7 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3973,6 +4023,7 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 294812b0ca..9e74848a04 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -182,6 +182,7 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -339,6 +340,7 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -523,6 +525,7 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -570,6 +573,7 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -721,6 +725,7 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -963,6 +968,7 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response): sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1092,6 +1098,7 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1190,6 +1197,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1418,6 +1426,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", + _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -1580,6 +1589,7 @@ async def test_hosted_mcp_tool_propagation_headers( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", + _experiments={"gen_ai_as_v2_spans": True}, ) response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True) @@ -1678,6 +1688,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -1726,6 +1737,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span", "transaction") @@ -1791,6 +1803,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -1835,6 +1848,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1948,6 +1962,7 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2078,6 +2093,7 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2202,6 +2218,7 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2259,6 +2276,7 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2302,6 +2320,7 @@ def test_openai_agents_message_role_mapping( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) get_response_kwargs = {"input": [test_message]} @@ -2401,6 +2420,7 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2498,6 +2518,7 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2591,6 +2612,7 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2679,6 +2701,7 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2801,6 +2824,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2889,6 +2913,7 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3023,6 +3048,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3065,6 +3091,7 @@ def test_openai_agents_message_truncation(sentry_init, capture_items): integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_messages = [ @@ -3111,6 +3138,7 @@ async def test_streaming_span_update_captures_response_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent) @@ -3176,6 +3204,7 @@ async def test_streaming_ttft_on_chat_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -3330,6 +3359,7 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3468,6 +3498,7 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3531,6 +3562,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index fe34dd0f5d..9faccb0a84 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -61,6 +61,7 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -102,6 +103,7 @@ async def test_agent_run_async_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -135,6 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -179,6 +182,7 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -211,6 +215,7 @@ def test_agent_run_sync_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -244,6 +249,7 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -288,6 +294,7 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -322,6 +329,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -387,6 +395,7 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) retries = 0 @@ -470,6 +479,7 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -534,6 +544,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -583,6 +594,7 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -631,6 +643,7 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -676,6 +689,7 @@ async def test_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -700,6 +714,7 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -729,6 +744,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -765,6 +781,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -799,6 +816,7 @@ async def test_message_history(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -848,6 +866,7 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -878,6 +897,7 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent) integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -907,6 +927,7 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -938,6 +959,7 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -975,6 +997,7 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1066,6 +1089,7 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1135,6 +1159,7 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1158,6 +1183,7 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1182,6 +1208,7 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1208,6 +1235,7 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1242,6 +1270,7 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a second agent @@ -1297,6 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1347,6 +1377,7 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1386,6 +1417,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1412,6 +1444,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items) sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1440,6 +1473,7 @@ async def test_model_settings_object_style(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1475,6 +1509,7 @@ async def test_usage_data_partial(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1508,6 +1543,7 @@ async def test_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1530,6 +1566,7 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1564,6 +1601,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1609,6 +1647,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1657,6 +1696,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1684,6 +1724,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1709,6 +1750,7 @@ async def test_agent_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1734,6 +1776,7 @@ async def test_model_response_without_parts(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1764,6 +1807,7 @@ async def test_input_messages_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1793,6 +1837,7 @@ async def test_available_tools_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1822,6 +1867,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1848,6 +1894,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1889,6 +1936,7 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1918,6 +1966,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1954,6 +2003,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1993,6 +2043,7 @@ async def test_output_data_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2025,6 +2076,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2060,6 +2112,7 @@ async def test_message_with_instructions(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2094,6 +2147,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2122,6 +2176,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2149,6 +2204,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create model with callable name that raises exception @@ -2172,6 +2228,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Pass a string as model @@ -2191,6 +2248,7 @@ async def test_get_model_name_with_none(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Pass None @@ -2212,6 +2270,7 @@ async def test_set_model_data_with_system(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2243,6 +2302,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2276,6 +2336,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2308,6 +2369,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"gen_ai_as_v2_spans": True}, ) # Should return False @@ -2326,6 +2388,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2352,6 +2415,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2384,6 +2448,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2414,6 +2479,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2444,6 +2510,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2483,6 +2550,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2509,6 +2577,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2536,6 +2605,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2560,6 +2630,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2583,6 +2654,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Update with None span - should not raise @@ -2607,6 +2679,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2630,6 +2703,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) # Create a simple agent with no tools (won't have function_toolset) @@ -2661,6 +2735,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_ integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2694,6 +2769,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2722,6 +2798,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2749,6 +2826,7 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2797,6 +2875,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2827,6 +2906,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items) integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2870,6 +2950,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2893,7 +2974,11 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): @pytest.mark.asyncio async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): """Test that cache_read_tokens and cache_write_tokens are tracked.""" - sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, + ) items = capture_items("transaction", "span") @@ -2964,6 +3049,7 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3034,6 +3120,7 @@ async def test_invoke_agent_image_url( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) agent = Agent("test", name="test_image_url_agent") @@ -3081,6 +3168,7 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") From 9b20bd24b001af3953f6d4094d6461eea2c58231 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 17 Apr 2026 15:17:29 +0200 Subject: [PATCH 36/84] add experimental option to langgraph tests --- tests/integrations/langgraph/test_langgraph.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index e1a3baa0a8..b70889548f 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -154,6 +154,7 @@ def test_state_graph_compile( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") graph = MockStateGraph() @@ -209,6 +210,7 @@ def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_pro integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -311,6 +313,7 @@ def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_pr integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("What's the weather like?", name="user")]} @@ -391,6 +394,7 @@ def test_pregel_invoke_error(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail")]} @@ -421,6 +425,7 @@ def test_pregel_ainvoke_error(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail async")]} @@ -455,6 +460,7 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -486,6 +492,7 @@ def test_pregel_invoke_with_different_graph_names( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -529,6 +536,7 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -605,6 +613,7 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -684,6 +693,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -765,6 +775,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_ sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -849,6 +860,7 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -921,6 +933,7 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -996,6 +1009,7 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1080,6 +1094,7 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1212,6 +1227,7 @@ def test_extraction_functions_complex_scenario(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1287,6 +1303,7 @@ def test_langgraph_message_role_mapping(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1361,6 +1378,7 @@ def test_langgraph_message_truncation(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") From 88fc76ebaaf757b6f79db84f25eb5a1f9d5c858e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 20 Apr 2026 15:00:15 +0200 Subject: [PATCH 37/84] cleanup --- .../integrations/anthropic/test_anthropic.py | 26 ++++++------ .../google_genai/test_google_genai.py | 10 +---- .../integrations/langchain/test_langchain.py | 4 +- tests/integrations/openai/test_openai.py | 8 ++-- .../openai_agents/test_openai_agents.py | 40 +++++-------------- .../pydantic_ai/test_pydantic_ai.py | 1 + 6 files changed, 31 insertions(+), 58 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index b19cca9347..f38443bc94 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3725,7 +3725,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items): "source": { "type": "base64", "media_type": "application/pdf", - "attributes": "JVBERi0xLjQKJeLj...base64pdfdata", + "data": "JVBERi0xLjQKJeLj...base64pdfdata", }, }, ], @@ -3859,7 +3859,7 @@ def test_message_with_mixed_content(sentry_init, capture_items): "source": { "type": "base64", "media_type": "image/png", - "attributes": "iVBORw0KGgo...base64imagedata", + "data": "iVBORw0KGgo...base64imagedata", }, }, { @@ -3874,7 +3874,7 @@ def test_message_with_mixed_content(sentry_init, capture_items): "source": { "type": "base64", "media_type": "application/pdf", - "attributes": "JVBERi0xLjQK...base64pdfdata", + "data": "JVBERi0xLjQK...base64pdfdata", }, }, {"type": "text", "text": "Please provide a detailed analysis."}, @@ -3941,7 +3941,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite "source": { "type": "base64", "media_type": "image/jpeg", - "attributes": "base64data1...", + "data": "base64data1...", }, }, { @@ -4017,7 +4017,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) "source": { "type": "base64", "media_type": "image/jpeg", - "attributes": "base64encodeddatahere...", + "data": "base64encodeddatahere...", }, }, ], @@ -4056,7 +4056,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it "source": { "type": "base64", "media_type": "image/jpeg", - "attributes": "base64encodeddatahere...", + "data": "base64encodeddatahere...", }, }, ], @@ -4106,7 +4106,7 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items): model="claude-3-5-sonnet-20241022", ) - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 @@ -4158,7 +4158,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item model="claude-sonnet-4-20250514", ) - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 @@ -4281,7 +4281,7 @@ def test_input_tokens_include_cache_read_streaming( ): pass - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 @@ -4351,7 +4351,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( for event in stream: pass - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens should be total: 19 + 2846 = 2865 assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 @@ -4396,7 +4396,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): model="claude-sonnet-4-20250514", ) - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 @@ -4460,7 +4460,7 @@ def test_cache_tokens_streaming( ): pass - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 @@ -4524,7 +4524,7 @@ def test_stream_messages_cache_tokens( for event in stream: pass - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = (item.payload for item in items if item.type == "span") # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ae31fe565b..62c0530c31 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -400,8 +400,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): config=create_test_config(), ) - # Should have both transaction and error events - assert len([item for item in items if item.type == "transaction"]) == 1 (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" @@ -1122,8 +1120,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli contents=["This will fail"], ) - # Should have both transaction and error events - assert len([item for item in items if item.type == "transaction"]) == 1 (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" @@ -1352,8 +1348,6 @@ async def test_async_embed_content_error_handling( contents=["This will fail"], ) - # Should have both transaction and error events - assert len([item for item in items if item.type == "transaction"]) == 1 (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" @@ -2186,9 +2180,7 @@ def test_extract_contents_messages_dict_inline_data(): """Test extract_contents_messages with dict containing inline_data""" content_dict = { "role": "user", - "parts": [ - {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}} - ], + "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}], } result = extract_contents_messages(content_dict) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index ef27d45767..243a059432 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -897,7 +897,7 @@ def test_langchain_error(sentry_init, capture_items): with start_transaction(), pytest.raises(ValueError): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - error = next(item.payload for item in items if item.type == "event") + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" @@ -939,7 +939,7 @@ def test_span_status_error(sentry_init, capture_items): with pytest.raises(ValueError): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - error = next(item.payload for item in items if item.type == "event") + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" spans = [item.payload for item in items if item.type == "span"] diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c4d77db5c8..8263dedc70 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1462,8 +1462,8 @@ def test_span_status_error(sentry_init, capture_items): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = (item.payload for item in items if item.type == "event") - assert event["level"] == "error" + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["status"] == "error" @@ -1964,8 +1964,8 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["contexts"]["trace"]["origin"] == "manual" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9e74848a04..ffcf8685a7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -204,9 +204,7 @@ async def test_agent_invocation_span_no_pii( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT ) - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -354,9 +352,7 @@ async def test_agent_invocation_span( assert result is not None assert result.final_output == "Hello, how can I help you?" - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -583,9 +579,7 @@ def test_agent_invocation_span_sync_no_pii( assert result is not None assert result.final_output == "Hello, how can I help you?" - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -739,9 +733,7 @@ def test_agent_invocation_span_sync( assert result is not None assert result.final_output == "Hello, how can I help you?" - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -1208,9 +1200,7 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -1700,9 +1690,7 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -1747,16 +1735,12 @@ async def test_error_handling(sentry_init, capture_items, test_agent): test_agent, "Test input", run_config=test_run_config ) - error_events = [item.payload for item in items if item.type == "event"] - assert len(error_events) == 1 - error_event = error_events[0] + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["exception"]["values"][0]["type"] == "Exception" assert error_event["exception"]["values"][0]["value"] == "Model Error" assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents" - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -1811,9 +1795,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) with pytest.raises(InternalServerError, match="Error code: 500"): await agents.Runner.run(agent, "Test input", run_config=test_run_config) - error_events = [item.payload for item in items if item.type == "event"] - assert len(error_events) == 1 - error_event = error_events[0] + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["exception"]["values"][0]["type"] == "InternalServerError" assert error_event["exception"]["values"][0]["value"] == "Error code: 500" @@ -3574,9 +3556,7 @@ async def test_no_conversation_id_when_not_provided( assert result is not None - transactions = [item.payload for item in items if item.type == "transaction"] - assert len(transactions) == 1 - transaction = transactions[0] + (transaction,) = (item.payload for item in items if item.type == "transaction") spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 9faccb0a84..571d82279f 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -798,6 +798,7 @@ async def run_agent(input_text): # Verify each transaction is separate events = [item.payload for item in items if item.type == "transaction"] + assert len(events) == 3 for i, transaction in enumerate(events): assert transaction["transaction"] == "invoke_agent test_agent" From 08af4b4f5cde74e2f46a1dbe9ff651c27ff57658 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 20 Apr 2026 17:16:28 +0200 Subject: [PATCH 38/84] remove experimental option --- sentry_sdk/_types.py | 2 +- sentry_sdk/client.py | 6 +- sentry_sdk/consts.py | 1 - sentry_sdk/tracing.py | 18 ++-- .../integrations/anthropic/test_anthropic.py | 53 ------------ .../google_genai/test_google_genai.py | 37 -------- .../huggingface_hub/test_huggingface_hub.py | 6 -- .../integrations/langchain/test_langchain.py | 24 ------ .../integrations/langgraph/test_langgraph.py | 18 ---- tests/integrations/litellm/test_litellm.py | 28 ------ tests/integrations/openai/test_openai.py | 42 --------- .../openai_agents/test_openai_agents.py | 32 ------- .../pydantic_ai/test_pydantic_ai.py | 85 ------------------- 13 files changed, 17 insertions(+), 335 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index baf5f6a2fd..fbb9a166b8 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -12,7 +12,6 @@ SENSITIVE_DATA_SUBSTITUTE = "[Filtered]" -BLOB_DATA_SUBSTITUTE = "[Blob substitute]" class AnnotatedValue: @@ -209,6 +208,7 @@ class SDKInfo(TypedDict): "type": Literal["check_in", "transaction"], "user": dict[str, object], "_dropped_spans": int, + "_has_gen_ai_span": bool, }, total=False, ) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 87504c94b1..fd102e0679 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1116,9 +1116,9 @@ def capture_event( if is_transaction and isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - if is_transaction and not self.options["_experiments"].get( - "gen_ai_as_v2_spans", False - ): + span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False) + + if is_transaction and not span_recorder_has_gen_ai_span: envelope.add_transaction(event_opt) elif is_transaction: split_spans = _split_gen_ai_spans(event_opt) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 82107b49ee..73e5a6d9cb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -86,7 +86,6 @@ class CompressionAlgo(Enum): "trace_lifecycle": Optional[Literal["static", "stream"]], "ignore_spans": Optional[IgnoreSpansConfig], "suppress_asgi_chained_exceptions": Optional[bool], - "gen_ai_as_v2_spans": Optional[bool], }, total=False, ) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 7f2baba0c9..6c8cbb87e4 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1042,11 +1042,16 @@ def finish( return None - finished_spans = [ - span.to_json() - for span in self._span_recorder.spans - if span.timestamp is not None - ] + finished_spans = [] + has_gen_ai_span = False + for span in self._span_recorder.spans: + if span.timestamp is None: + continue + + if isinstance(span.op, str) and span.op.startswith("gen_ai."): + has_gen_ai_span = True + + finished_spans.append(span.to_json()) len_diff = len(self._span_recorder.spans) - len(finished_spans) dropped_spans = len_diff + self._span_recorder.dropped_spans @@ -1078,6 +1083,9 @@ def finish( if dropped_spans > 0: event["_dropped_spans"] = dropped_spans + if has_gen_ai_span: + event["_has_gen_ai_span"] = True + if self._profile is not None and self._profile.valid(): event["profile"] = self._profile self._profile = None diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index f38443bc94..865013f0b4 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -97,7 +97,6 @@ def test_nonstreaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -172,7 +171,6 @@ async def test_nonstreaming_create_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -289,7 +287,6 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -398,7 +395,6 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -502,7 +498,6 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -619,7 +614,6 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -729,7 +723,6 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -838,7 +831,6 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -961,7 +953,6 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1073,7 +1064,6 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1180,7 +1170,6 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1301,7 +1290,6 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1412,7 +1400,6 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1523,7 +1510,6 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1680,7 +1666,6 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1830,7 +1815,6 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1988,7 +1972,6 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2146,7 +2129,6 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2208,7 +2190,6 @@ def test_exception_message_create(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -2234,7 +2215,6 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -2264,7 +2244,6 @@ async def test_span_status_error_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -2294,7 +2273,6 @@ async def test_exception_message_create_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -2320,7 +2298,6 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2351,7 +2328,6 @@ async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2415,7 +2391,6 @@ def test_set_output_data_with_input_json_delta(sentry_init): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with start_transaction(name="test"): @@ -2466,7 +2441,6 @@ def test_anthropic_message_role_mapping( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2513,7 +2487,6 @@ def test_anthropic_message_truncation(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2564,7 +2537,6 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2625,7 +2597,6 @@ def test_nonstreaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2712,7 +2683,6 @@ async def test_nonstreaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") @@ -2842,7 +2812,6 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2973,7 +2942,6 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3106,7 +3074,6 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3239,7 +3206,6 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3315,7 +3281,6 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3569,7 +3534,6 @@ def test_message_with_base64_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3620,7 +3584,6 @@ def test_message_with_url_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3664,7 +3627,6 @@ def test_message_with_file_image(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3709,7 +3671,6 @@ def test_message_with_base64_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3754,7 +3715,6 @@ def test_message_with_url_pdf(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3798,7 +3758,6 @@ def test_message_with_file_document(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3843,7 +3802,6 @@ def test_message_with_mixed_content(sentry_init, capture_items): integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -3926,7 +3884,6 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4001,7 +3958,6 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4040,7 +3996,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4078,7 +4033,6 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4130,7 +4084,6 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4182,7 +4135,6 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4263,7 +4215,6 @@ def test_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -4333,7 +4284,6 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -4370,7 +4320,6 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -4442,7 +4391,6 @@ def test_cache_tokens_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -4506,7 +4454,6 @@ def test_stream_messages_cache_tokens( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 62c0530c31..3974041314 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -130,7 +130,6 @@ def test_nonstreaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -220,7 +219,6 @@ def test_generate_content_with_system_instruction( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -264,7 +262,6 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -347,7 +344,6 @@ def test_tool_execution(sentry_init, capture_items): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -384,7 +380,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction") @@ -414,7 +409,6 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -531,7 +525,6 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -559,7 +552,6 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_ sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -601,7 +593,6 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -666,7 +657,6 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -706,7 +696,6 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -738,7 +727,6 @@ def test_response_with_different_id_fields( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -781,7 +769,6 @@ def test_tool_with_async_function(sentry_init): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create an async tool function @@ -804,7 +791,6 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client): integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -831,7 +817,6 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -918,7 +903,6 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -994,7 +978,6 @@ def test_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1056,7 +1039,6 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1103,7 +1085,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1135,7 +1116,6 @@ def test_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1175,7 +1155,6 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1216,7 +1195,6 @@ async def test_async_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1281,7 +1259,6 @@ async def test_async_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1331,7 +1308,6 @@ async def test_async_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "event") @@ -1364,7 +1340,6 @@ async def test_async_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1407,7 +1382,6 @@ async def test_async_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1439,7 +1413,6 @@ def test_generate_content_with_content_object( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1476,7 +1449,6 @@ def test_generate_content_with_dict_format( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1509,7 +1481,6 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1559,7 +1530,6 @@ def test_generate_content_with_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1605,7 +1575,6 @@ def test_generate_content_with_function_response( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1660,7 +1629,6 @@ def test_generate_content_with_mixed_string_and_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1704,7 +1672,6 @@ def test_generate_content_with_part_object_directly( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1743,7 +1710,6 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1780,7 +1746,6 @@ def test_generate_content_with_dict_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1825,7 +1790,6 @@ def test_generate_content_without_parts_property_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1869,7 +1833,6 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index eaac8c1ab1..031627906a 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -480,7 +480,6 @@ def test_text_generation( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -556,7 +555,6 @@ def test_text_generation_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -633,7 +631,6 @@ def test_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -712,7 +709,6 @@ def test_chat_completion_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -885,7 +881,6 @@ def test_chat_completion_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -981,7 +976,6 @@ def test_chat_completion_streaming_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 243a059432..3c1d9bef54 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -108,7 +108,6 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -217,7 +216,6 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -338,7 +336,6 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -531,7 +528,6 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -869,7 +865,6 @@ def test_langchain_error(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -908,7 +903,6 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1036,7 +1030,6 @@ def test_langchain_callback_manager(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) local_manager = BaseCallbackManager(handlers=[]) @@ -1069,7 +1062,6 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_manager = BaseCallbackManager(handlers=[sentry_callback]) @@ -1102,7 +1094,6 @@ def test_langchain_callback_list(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) local_callbacks = [] @@ -1135,7 +1126,6 @@ def test_langchain_callback_list_existing_callback(sentry_init): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) sentry_callback = SentryLangchainCallback(0, False) local_callbacks = [sentry_callback] @@ -1173,7 +1163,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1311,7 +1300,6 @@ def test_langchain_message_truncation(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1404,7 +1392,6 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1483,7 +1470,6 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1558,7 +1544,6 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1631,7 +1616,6 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1693,7 +1677,6 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1779,7 +1762,6 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1837,7 +1819,6 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1878,7 +1859,6 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1942,7 +1922,6 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1996,7 +1975,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2061,7 +2039,6 @@ def test_langchain_response_model_extraction( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2367,7 +2344,6 @@ def test_langchain_ai_system_detection( sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index b70889548f..e1a3baa0a8 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -154,7 +154,6 @@ def test_state_graph_compile( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") graph = MockStateGraph() @@ -210,7 +209,6 @@ def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_pro integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -313,7 +311,6 @@ def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_pr integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("What's the weather like?", name="user")]} @@ -394,7 +391,6 @@ def test_pregel_invoke_error(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail")]} @@ -425,7 +421,6 @@ def test_pregel_ainvoke_error(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") test_state = {"messages": [MockMessage("This will fail async")]} @@ -460,7 +455,6 @@ def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -492,7 +486,6 @@ def test_pregel_invoke_with_different_graph_names( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -536,7 +529,6 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -613,7 +605,6 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -693,7 +684,6 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -775,7 +765,6 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_ sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -860,7 +849,6 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -933,7 +921,6 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1009,7 +996,6 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1094,7 +1080,6 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1227,7 +1212,6 @@ def test_extraction_functions_complex_scenario(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1303,7 +1287,6 @@ def test_langgraph_message_role_mapping(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1378,7 +1361,6 @@ def test_langgraph_message_truncation(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b9365e7008..90807744e7 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -152,7 +152,6 @@ def test_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -234,7 +233,6 @@ async def test_async_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -318,7 +316,6 @@ def test_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -389,7 +386,6 @@ async def test_async_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -456,7 +452,6 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -526,7 +521,6 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -591,7 +585,6 @@ def test_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -654,7 +647,6 @@ async def test_async_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -717,7 +709,6 @@ def test_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -774,7 +765,6 @@ async def test_async_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -825,7 +815,6 @@ def test_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -864,7 +853,6 @@ async def test_async_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -906,7 +894,6 @@ def test_span_origin( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -954,7 +941,6 @@ def test_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction") @@ -1050,7 +1036,6 @@ async def test_async_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1147,7 +1132,6 @@ def test_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1207,7 +1191,6 @@ async def test_async_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1267,7 +1250,6 @@ def test_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1314,7 +1296,6 @@ async def test_async_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1357,7 +1338,6 @@ def test_response_without_usage(sentry_init, capture_items): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1399,7 +1379,6 @@ def test_integration_setup(sentry_init): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Check that callbacks are registered @@ -1414,7 +1393,6 @@ def test_litellm_message_truncation(sentry_init, capture_items): integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1481,7 +1459,6 @@ def test_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1561,7 +1538,6 @@ async def test_async_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1642,7 +1618,6 @@ def test_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1711,7 +1686,6 @@ async def test_async_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1781,7 +1755,6 @@ def test_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1855,7 +1828,6 @@ async def test_async_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 8263dedc70..4b9d629d96 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -138,7 +138,6 @@ def test_nonstreaming_chat_completion_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -234,7 +233,6 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -314,7 +312,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -410,7 +407,6 @@ async def test_nonstreaming_chat_completion_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -506,7 +502,6 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -626,7 +621,6 @@ def test_streaming_chat_completion_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -707,7 +701,6 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -771,7 +764,6 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -837,7 +829,6 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -966,7 +957,6 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1119,7 +1109,6 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1291,7 +1280,6 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1426,7 +1414,6 @@ def test_bad_chat_completion(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1448,7 +1435,6 @@ def test_span_status_error(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1477,7 +1463,6 @@ async def test_bad_chat_completion_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1509,7 +1494,6 @@ def test_embeddings_create_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1592,7 +1576,6 @@ def test_embeddings_create(sentry_init, capture_items, input, request): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1664,7 +1647,6 @@ async def test_embeddings_create_async_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1748,7 +1730,6 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -1817,7 +1798,6 @@ def test_embeddings_create_raises_error( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1846,7 +1826,6 @@ async def test_embeddings_create_raises_error_async( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event") @@ -1867,7 +1846,6 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1891,7 +1869,6 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1914,7 +1891,6 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1978,7 +1954,6 @@ async def test_span_origin_streaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2045,7 +2020,6 @@ def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2077,7 +2051,6 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2471,7 +2444,6 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2594,7 +2566,6 @@ def test_ai_client_span_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -2805,7 +2776,6 @@ def test_error_in_responses_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -2912,7 +2882,6 @@ async def test_ai_client_span_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3198,7 +3167,6 @@ async def test_ai_client_span_streaming_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3424,7 +3392,6 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -3552,7 +3519,6 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3629,7 +3595,6 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3693,7 +3658,6 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3736,7 +3700,6 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3767,7 +3730,6 @@ def test_openai_message_truncation(sentry_init, capture_items): integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3817,7 +3779,6 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3896,7 +3857,6 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -3973,7 +3933,6 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -4023,7 +3982,6 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index ffcf8685a7..bde222274c 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -182,7 +182,6 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -338,7 +337,6 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -521,7 +519,6 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -569,7 +566,6 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -719,7 +715,6 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -960,7 +955,6 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response): sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1090,7 +1084,6 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1189,7 +1182,6 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1416,7 +1408,6 @@ async def test_hosted_mcp_tool_propagation_header_streamed( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", - _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -1579,7 +1570,6 @@ async def test_hosted_mcp_tool_propagation_headers( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, release="d08ebdb9309e1b004c6f52202de58a09c2268e42", - _experiments={"gen_ai_as_v2_spans": True}, ) response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True) @@ -1678,7 +1668,6 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -1725,7 +1714,6 @@ async def test_error_handling(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span", "transaction") @@ -1787,7 +1775,6 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "span") @@ -1830,7 +1817,6 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -1944,7 +1930,6 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2075,7 +2060,6 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2200,7 +2184,6 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2258,7 +2241,6 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2302,7 +2284,6 @@ def test_openai_agents_message_role_mapping( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) get_response_kwargs = {"input": [test_message]} @@ -2402,7 +2383,6 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2500,7 +2480,6 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2594,7 +2573,6 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2683,7 +2661,6 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2806,7 +2783,6 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -2895,7 +2871,6 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3030,7 +3005,6 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3073,7 +3047,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_items): integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_messages = [ @@ -3120,7 +3093,6 @@ async def test_streaming_span_update_captures_response_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent) @@ -3186,7 +3158,6 @@ async def test_streaming_ttft_on_chat_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) request_headers = {} @@ -3341,7 +3312,6 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3480,7 +3450,6 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") @@ -3544,7 +3513,6 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span", "transaction") diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 571d82279f..cfb1ca09ca 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -61,7 +61,6 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -103,7 +102,6 @@ async def test_agent_run_async_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -137,7 +135,6 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -182,7 +179,6 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -215,7 +211,6 @@ def test_agent_run_sync_model_error(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("event", "transaction", "span") @@ -249,7 +244,6 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -294,7 +288,6 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -329,7 +322,6 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -395,7 +387,6 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) retries = 0 @@ -479,7 +470,6 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -544,7 +534,6 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -594,7 +583,6 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -643,7 +631,6 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -689,7 +676,6 @@ async def test_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -714,7 +700,6 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -744,7 +729,6 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -781,7 +765,6 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -817,7 +800,6 @@ async def test_message_history(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -867,7 +849,6 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -898,7 +879,6 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent) integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -928,7 +908,6 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -960,7 +939,6 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -998,7 +976,6 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1090,7 +1067,6 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1160,7 +1136,6 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1184,7 +1159,6 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1209,7 +1183,6 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Verify context is not set before run @@ -1236,7 +1209,6 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1271,7 +1243,6 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a second agent @@ -1327,7 +1298,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1378,7 +1348,6 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1418,7 +1387,6 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1445,7 +1413,6 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items) sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Test the utility function directly @@ -1474,7 +1441,6 @@ async def test_model_settings_object_style(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1510,7 +1476,6 @@ async def test_usage_data_partial(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1544,7 +1509,6 @@ async def test_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1567,7 +1531,6 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1602,7 +1565,6 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) test_agent = get_test_agent() @@ -1648,7 +1610,6 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1697,7 +1658,6 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1725,7 +1685,6 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1751,7 +1710,6 @@ async def test_agent_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1777,7 +1735,6 @@ async def test_model_response_without_parts(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1808,7 +1765,6 @@ async def test_input_messages_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1838,7 +1794,6 @@ async def test_available_tools_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1868,7 +1823,6 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1895,7 +1849,6 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -1937,7 +1890,6 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -1967,7 +1919,6 @@ async def test_message_parts_with_list_content(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2004,7 +1955,6 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2044,7 +1994,6 @@ async def test_output_data_error_handling(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2077,7 +2026,6 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2113,7 +2061,6 @@ async def test_message_with_instructions(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2148,7 +2095,6 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2177,7 +2123,6 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2205,7 +2150,6 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create model with callable name that raises exception @@ -2229,7 +2173,6 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Pass a string as model @@ -2249,7 +2192,6 @@ async def test_get_model_name_with_none(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Pass None @@ -2271,7 +2213,6 @@ async def test_set_model_data_with_system(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2303,7 +2244,6 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2337,7 +2277,6 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2370,7 +2309,6 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"gen_ai_as_v2_spans": True}, ) # Should return False @@ -2389,7 +2327,6 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2416,7 +2353,6 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2449,7 +2385,6 @@ async def test_set_agent_data_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2480,7 +2415,6 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2511,7 +2445,6 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2551,7 +2484,6 @@ async def test_execute_tool_span_creation(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2578,7 +2510,6 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2606,7 +2537,6 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items): integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2631,7 +2561,6 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2655,7 +2584,6 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Update with None span - should not raise @@ -2680,7 +2608,6 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2704,7 +2631,6 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) # Create a simple agent with no tools (won't have function_toolset) @@ -2736,7 +2662,6 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_ integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2770,7 +2695,6 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2799,7 +2723,6 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2827,7 +2750,6 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) with sentry_sdk.start_transaction(op="test", name="test") as transaction: @@ -2876,7 +2798,6 @@ async def test_binary_content_encoding_image(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2907,7 +2828,6 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items) integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2951,7 +2871,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -2978,7 +2897,6 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3050,7 +2968,6 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") @@ -3121,7 +3038,6 @@ async def test_invoke_agent_image_url( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) agent = Agent("test", name="test_image_url_agent") @@ -3169,7 +3085,6 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("transaction", "span") From 7bd12aef87cd69e975cb6c383f84d715f07aa1d7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 20 Apr 2026 18:24:43 +0200 Subject: [PATCH 39/84] add constant again --- sentry_sdk/_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index fbb9a166b8..814b90c440 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -12,6 +12,7 @@ SENSITIVE_DATA_SUBSTITUTE = "[Filtered]" +BLOB_DATA_SUBSTITUTE = "[Blob substitute]" class AnnotatedValue: From ef843a0569c2c09381248ebc9b8901ca0dcbe8d8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 20 Apr 2026 18:39:10 +0200 Subject: [PATCH 40/84] add name fallback --- sentry_sdk/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index fd102e0679..316bc8877a 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -175,6 +175,10 @@ def _serialized_v1_span_to_serialized_v2_span( if "description" in span: res["name"] = span["description"] + elif ( + "op" in span + ): # fallback based on observed downstream fallback for transactions + res["name"] = span["op"] if "start_timestamp" in span: start_timestamp = None From 4e3e2d01bde97811466eca1d206f56066c50be8d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 20 Apr 2026 19:11:42 +0200 Subject: [PATCH 41/84] remove remaining experimental option references --- tests/tracing/test_decorator.py | 3 --- tests/tracing/test_misc.py | 1 - 2 files changed, 4 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index d370b4bbc9..a71ca5588f 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -124,7 +124,6 @@ async def _some_function_traced(a, b, c): def test_span_templates_ai_dicts(sentry_init, capture_items): sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -246,7 +245,6 @@ def my_agent(): def test_span_templates_ai_objects(sentry_init, capture_items): sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") @@ -374,7 +372,6 @@ def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 4209a02b4b..0e35668b48 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -652,7 +652,6 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op( """Span with gen_ai.* op should get conversation_id.""" sentry_init( traces_sample_rate=1.0, - _experiments={"gen_ai_as_v2_spans": True}, ) items = capture_items("span") From 44b2c2d952c996f5f5055ed1c028dd46f184b9ac Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 21 Apr 2026 09:58:23 +0200 Subject: [PATCH 42/84] update test with hardcoded version --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 031627906a..3c79ca7262 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -520,7 +520,7 @@ def test_text_generation( "sentry.origin": "auto.ai.huggingface_hub", "sentry.release": mock.ANY, "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": "2.58.0", + "sentry.sdk.version": mock.ANY, "sentry.segment.id": mock.ANY, "sentry.segment.name": "test", "thread.id": mock.ANY, From 307db734e3e61eda241a427a0cc60d912865ab82 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 11 May 2026 14:46:40 +0200 Subject: [PATCH 43/84] merge fixes --- tests/integrations/openai/test_openai.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 41bd7f2d51..17d80da1b5 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1500,9 +1500,6 @@ def test_span_status_error(sentry_init, capture_items): spans = [item.payload for item in items if item.type == "span"] assert spans[0]["status"] == "error" - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["contexts"]["trace"]["status"] == "internal_error" - @pytest.mark.asyncio async def test_bad_chat_completion_async(sentry_init, capture_items): From efc37e1864dd87ddacd27c7d4bf6fc83682cc60d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 11 May 2026 14:59:36 +0200 Subject: [PATCH 44/84] adapt new test --- tests/integrations/openai/test_openai.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 17d80da1b5..d5e78bad99 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -2852,13 +2852,13 @@ def test_ai_client_span_responses_api( ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_responses_api_conversation_id( - sentry_init, capture_events, conversation, expected_id + sentry_init, capture_items, conversation, expected_id ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) @@ -2870,13 +2870,12 @@ def test_responses_api_conversation_id( conversation=conversation, ) - (transaction,) = events - (span,) = transaction["spans"] + (span,) = (item.payload for item in items if item.type == "span") if expected_id is None: - assert "gen_ai.conversation.id" not in span["data"] + assert "gen_ai.conversation.id" not in span["attributes"] else: - assert span["data"]["gen_ai.conversation.id"] == expected_id + assert span["attributes"]["gen_ai.conversation.id"] == expected_id @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") From bee63202465ec87a6bfff74b8a75366d041e9342 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:05:54 +0200 Subject: [PATCH 45/84] add parameter --- sentry_sdk/tracing.py | 19 +- .../integrations/anthropic/test_anthropic.py | 4539 ++++++++++++----- .../google_genai/test_google_genai.py | 2062 ++++++-- .../huggingface_hub/test_huggingface_hub.py | 1330 +++-- .../integrations/langchain/test_langchain.py | 3869 ++++++++++---- .../integrations/langgraph/test_langgraph.py | 1503 ++++-- tests/integrations/litellm/test_litellm.py | 2367 ++++++--- tests/integrations/openai/test_openai.py | 3531 +++++++++---- .../openai_agents/test_openai_agents.py | 3545 +++++++++---- .../pydantic_ai/test_pydantic_ai.py | 2498 ++++++--- 10 files changed, 18393 insertions(+), 6870 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 9aab29996d..96029f1f58 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1042,14 +1042,21 @@ def finish( finished_spans = [] has_gen_ai_span = False - for span in self._span_recorder.spans: - if span.timestamp is None: - continue + if client.options["_experiments"].get("stream_gen_ai_spans", False): + for span in self._span_recorder.spans: + if span.timestamp is None: + continue - if isinstance(span.op, str) and span.op.startswith("gen_ai."): - has_gen_ai_span = True + if isinstance(span.op, str) and span.op.startswith("gen_ai."): + has_gen_ai_span = True - finished_spans.append(span.to_json()) + finished_spans.append(span.to_json()) + else: + finished_spans = [ + span.to_json() + for span in self._span_recorder.spans + if span.timestamp is not None + ] len_diff = len(self._span_recorder.spans) - len(finished_spans) dropped_spans = len_diff + self._span_recorder.dropped_spans diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 865013f0b4..1378f777df 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -81,6 +81,7 @@ async def __call__(self, *args, **kwargs): ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -91,14 +92,20 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_create_message( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -109,51 +116,106 @@ def test_nonstreaming_create_message( } ] - with start_transaction(name="anthropic"): - response = client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert response == EXAMPLE_MESSAGE - usage = response.usage + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) - assert usage.input_tokens == 10 - assert usage.output_tokens == 20 + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "end_turn" + ] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + events = capture_events() + + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -165,14 +227,20 @@ def test_nonstreaming_create_message( ], ) async def test_nonstreaming_create_message_async( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -183,50 +251,102 @@ async def test_nonstreaming_create_message_async( } ] - with start_transaction(name="anthropic"): - response = await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert response == EXAMPLE_MESSAGE - usage = response.usage + with start_transaction(name="anthropic"): + response = await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) - assert usage.input_tokens == 10 - assert usage.output_tokens == 20 + assert response == EXAMPLE_MESSAGE + usage = response.usage - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - if send_default_pii and include_prompts: + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + events = capture_events() + + with start_transaction(name="anthropic"): + response = await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -238,11 +358,13 @@ async def test_nonstreaming_create_message_async( ) def test_streaming_create_message( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -287,8 +409,8 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -297,12 +419,14 @@ def test_streaming_create_message( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -310,47 +434,102 @@ def test_streaming_create_message( for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - if send_default_pii and include_prompts: + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" - + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "max_tokens" + ] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_streaming_create_message_close( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -395,8 +574,8 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -405,12 +584,14 @@ def test_streaming_create_message_close( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): messages = client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -420,45 +601,92 @@ def test_streaming_create_message_close( messages.close() - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + else: + events = capture_events() + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + messages = client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + for _ in range(4): + next(messages) + + messages.close() + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 41), reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.", ) def test_streaming_create_message_api_error( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -498,8 +726,8 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -508,52 +736,99 @@ def test_streaming_create_message_api_error( } ] - with pytest.raises(APIStatusError), mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) for _ in message: pass + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "error" + assert span["status"] == "error" + else: + events = capture_events() + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + assert span["status"] == "internal_error" + assert span["tags"]["status"] == "internal_error" assert event["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -565,11 +840,13 @@ def test_streaming_create_message_api_error( ) def test_stream_messages( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -614,8 +891,8 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -624,61 +901,116 @@ def test_stream_messages( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - ) as stream: - for event in stream: - pass + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" - + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "max_tokens" + ] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_stream_messages_close( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -723,8 +1055,8 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -733,65 +1065,117 @@ def test_stream_messages_close( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - ) as stream: - for _ in range(4): - next(stream) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # New versions add TextEvent, so consume one more event. - if TextEvent is not None and isinstance(next(stream), TextEvent): - next(stream) + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for _ in range(4): + next(stream) - stream.close() + # New versions add TextEvent, so consume one more event. + if TextEvent is not None and isinstance(next(stream), TextEvent): + next(stream) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + stream.close() - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" + + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for _ in range(4): + next(stream) + + # New versions add TextEvent, so consume one more event. + if TextEvent is not None and isinstance(next(stream), TextEvent): + next(stream) + stream.close() + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 41), reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.", ) def test_stream_messages_api_error( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -831,8 +1215,8 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -841,53 +1225,100 @@ def test_stream_messages_api_error( } ] - with pytest.raises(APIStatusError), mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - ) as stream: - for event in stream: - pass + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + assert span["status"] == "error" + else: + events = capture_events() + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass + + assert len(events) == 1 + (event,) = events - assert span["status"] == "error" + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + assert span["status"] == "internal_error" + assert span["tags"]["status"] == "internal_error" assert event["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -900,12 +1331,14 @@ def test_stream_messages_api_error( ) async def test_streaming_create_message_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -953,8 +1386,8 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -963,12 +1396,14 @@ async def test_streaming_create_message_async( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = await client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -976,48 +1411,104 @@ async def test_streaming_create_message_async( async for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" - + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "max_tokens" + ] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + async for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + assert len(event["spans"]) == 1 + (span,) = event["spans"] + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_streaming_create_message_async_close( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -1064,8 +1555,8 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1074,12 +1565,58 @@ async def test_streaming_create_message_async_close( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + messages = await client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + for _ in range(4): + await messages.__anext__() + await messages.close() + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" + + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): messages = await client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -1088,36 +1625,36 @@ async def test_streaming_create_message_async_close( await messages.__anext__() await messages.close() - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + assert len(events) == 1 + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 41), reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.", @@ -1125,10 +1662,12 @@ async def test_streaming_create_message_async_close( @pytest.mark.asyncio async def test_streaming_create_message_async_api_error( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -1170,8 +1709,8 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1180,12 +1719,14 @@ async def test_streaming_create_message_async_api_error( } ] - with pytest.raises(APIStatusError), mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = await client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -1193,39 +1734,85 @@ async def test_streaming_create_message_async_api_error( async for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + assert span["status"] == "error" + else: + events = capture_events() + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + async for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "error" + assert span["status"] == "internal_error" + assert span["tags"]["status"] == "internal_error" assert event["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -1238,12 +1825,14 @@ async def test_streaming_create_message_async_api_error( ) async def test_stream_message_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -1290,8 +1879,8 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1300,12 +1889,14 @@ async def test_stream_message_async( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): async with client.messages.stream( max_tokens=1024, messages=messages, @@ -1314,40 +1905,91 @@ async def test_stream_message_async( async for event in stream: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" + + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - if send_default_pii and include_prompts: + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + async for event in stream: + pass + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 41), reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.", @@ -1355,10 +1997,12 @@ async def test_stream_message_async( @pytest.mark.asyncio async def test_stream_messages_async_api_error( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -1400,8 +2044,8 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1410,12 +2054,14 @@ async def test_stream_messages_async_api_error( } ] - with pytest.raises(APIStatusError), mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): async with client.messages.stream( max_tokens=1024, messages=messages, @@ -1424,46 +2070,95 @@ async def test_stream_messages_async_api_error( async for event in stream: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "error" + assert span["status"] == "error" + else: + events = capture_events() + + with pytest.raises(APIStatusError), mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + async for event in stream: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + + assert span["status"] == "internal_error" + assert span["tags"]["status"] == "internal_error" assert event["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_stream_messages_async_close( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") @@ -1510,8 +2205,8 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1520,12 +2215,14 @@ async def test_stream_messages_async_close( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): async with client.messages.stream( max_tokens=1024, messages=messages, @@ -1542,36 +2239,88 @@ async def test_stream_messages_async_close( await stream.close() - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] - == "msg_01XFDUDYJgAACzvnptvVoYEL" - ) + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for _ in range(4): + await stream.__anext__() + + # New versions add TextEvent, so consume one more event. + if TextEvent is not None and isinstance( + await stream.__anext__(), TextEvent + ): + await stream.__anext__() + + await stream.close() + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "Hello, Claude"}]' + ) + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 27), reason="Versions <0.27.0 do not include InputJSONDelta, which was introduced in >=0.27.0 along with a new message delta type for tool calling.", @@ -1587,11 +2336,13 @@ async def test_stream_messages_async_close( ) def test_streaming_create_message_with_input_json_delta( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -1666,8 +2417,8 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1676,12 +2427,14 @@ def test_streaming_create_message_with_input_json_delta( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -1689,38 +2442,87 @@ def test_streaming_create_message_with_input_json_delta( for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 27), reason="Versions <0.27.0 do not include InputJSONDelta, which was introduced in >=0.27.0 along with a new message delta type for tool calling.", @@ -1736,11 +2538,13 @@ def test_streaming_create_message_with_input_json_delta( ) def test_stream_messages_with_input_json_delta( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): client = Anthropic(api_key="z") @@ -1815,8 +2619,8 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1825,52 +2629,101 @@ def test_stream_messages_with_input_json_delta( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - ) as stream: - for event in stream: - pass + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + for event in stream: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 27), @@ -1887,12 +2740,14 @@ def test_stream_messages_with_input_json_delta( ) async def test_streaming_create_message_with_input_json_delta_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") response = get_model_response( @@ -1972,8 +2827,8 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1982,12 +2837,14 @@ async def test_streaming_create_message_with_input_json_delta_async( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = await client.messages.create( max_tokens=1024, messages=messages, model="model", stream=True ) @@ -1995,39 +2852,88 @@ async def test_streaming_create_message_with_input_json_delta_async( async for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + + async for _ in message: + pass + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif( ANTHROPIC_VERSION < (0, 27), @@ -2044,12 +2950,14 @@ async def test_streaming_create_message_with_input_json_delta_async( ) async def test_stream_message_with_input_json_delta_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): client = AsyncAnthropic(api_key="z") response = get_model_response( @@ -2129,8 +3037,8 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -2139,12 +3047,14 @@ async def test_stream_message_with_input_json_delta_async( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): async with client.messages.stream( max_tokens=1024, messages=messages, @@ -2153,76 +3063,132 @@ async def test_stream_message_with_input_json_delta_async( async for event in stream: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" - - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + if send_default_pii and include_prompts: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + events = capture_events() + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + ) as stream: + async for event in stream: + pass -def test_exception_message_create(sentry_init, capture_items): + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + if stream_gen_ai_spans: + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + else: + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_exception_message_create( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction") client = Anthropic(api_key="z") client.messages._post = mock.Mock( side_effect=AnthropicError("API rate limit reached") ) - with pytest.raises(AnthropicError): - client.messages.create( - model="some-model", - messages=[{"role": "system", "content": "I'm throwing an exception"}], - max_tokens=1024, - ) - (event,) = (item.payload for item in items if item.type == "event") - assert event["level"] == "error" + if stream_gen_ai_spans: + items = capture_items("event", "transaction") - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["contexts"]["trace"]["status"] == "internal_error" + with pytest.raises(AnthropicError): + client.messages.create( + model="some-model", + messages=[{"role": "system", "content": "I'm throwing an exception"}], + max_tokens=1024, + ) + (event,) = (item.payload for item in items if item.type == "event") + assert event["level"] == "error" -def test_span_status_error(sentry_init, capture_items): - sentry_init( - integrations=[AnthropicIntegration()], - traces_sample_rate=1.0, - ) - items = capture_items("event", "span") + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() - with start_transaction(name="anthropic"): - client = Anthropic(api_key="z") - client.messages._post = mock.Mock( - side_effect=AnthropicError("API rate limit reached") - ) with pytest.raises(AnthropicError): client.messages.create( model="some-model", @@ -2230,76 +3196,195 @@ def test_span_status_error(sentry_init, capture_items): max_tokens=1024, ) - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + (event, transaction) = events + assert event["level"] == "error" - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" - assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert transaction["contexts"]["trace"]["status"] == "internal_error" -@pytest.mark.asyncio -async def test_span_status_error_async(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_status_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "span") + if stream_gen_ai_spans: + items = capture_items("event", "span") - with start_transaction(name="anthropic"): - client = AsyncAnthropic(api_key="z") - client.messages._post = AsyncMock( - side_effect=AnthropicError("API rate limit reached") - ) - with pytest.raises(AnthropicError): - await client.messages.create( - model="some-model", - messages=[{"role": "system", "content": "I'm throwing an exception"}], - max_tokens=1024, + with start_transaction(name="anthropic"): + client = Anthropic(api_key="z") + client.messages._post = mock.Mock( + side_effect=AnthropicError("API rate limit reached") ) + with pytest.raises(AnthropicError): + client.messages.create( + model="some-model", + messages=[ + {"role": "system", "content": "I'm throwing an exception"} + ], + max_tokens=1024, + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + else: + events = capture_events() - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" - - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" - assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + with start_transaction(name="anthropic"): + client = Anthropic(api_key="z") + client.messages._post = mock.Mock( + side_effect=AnthropicError("API rate limit reached") + ) + with pytest.raises(AnthropicError): + client.messages.create( + model="some-model", + messages=[ + {"role": "system", "content": "I'm throwing an exception"} + ], + max_tokens=1024, + ) + + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" + assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +@pytest.mark.asyncio +async def test_span_status_error_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): + sentry_init( + integrations=[AnthropicIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + if stream_gen_ai_spans: + items = capture_items("event", "span") + with start_transaction(name="anthropic"): + client = AsyncAnthropic(api_key="z") + client.messages._post = AsyncMock( + side_effect=AnthropicError("API rate limit reached") + ) + with pytest.raises(AnthropicError): + await client.messages.create( + model="some-model", + messages=[ + {"role": "system", "content": "I'm throwing an exception"} + ], + max_tokens=1024, + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + else: + events = capture_events() + with start_transaction(name="anthropic"): + client = AsyncAnthropic(api_key="z") + client.messages._post = AsyncMock( + side_effect=AnthropicError("API rate limit reached") + ) + with pytest.raises(AnthropicError): + await client.messages.create( + model="some-model", + messages=[ + {"role": "system", "content": "I'm throwing an exception"} + ], + max_tokens=1024, + ) + + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" + assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_exception_message_create_async(sentry_init, capture_items): +async def test_exception_message_create_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock( side_effect=AnthropicError("API rate limit reached") ) - with pytest.raises(AnthropicError): - await client.messages.create( - model="some-model", - messages=[{"role": "system", "content": "I'm throwing an exception"}], - max_tokens=1024, - ) - (event,) = (item.payload for item in items if item.type == "event") - assert event["level"] == "error" + if stream_gen_ai_spans: + items = capture_items("event", "transaction") + + with pytest.raises(AnthropicError): + await client.messages.create( + model="some-model", + messages=[{"role": "system", "content": "I'm throwing an exception"}], + max_tokens=1024, + ) + + (event,) = (item.payload for item in items if item.type == "event") + assert event["level"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with pytest.raises(AnthropicError): + await client.messages.create( + model="some-model", + messages=[{"role": "system", "content": "I'm throwing an exception"}], + max_tokens=1024, + ) - (transaction,) = (item.payload for item in items if item.type == "transaction") + (event, transaction) = events + assert event["level"] == "error" assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_origin(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_origin( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2311,25 +3396,45 @@ def test_span_origin(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + else: + events = capture_events() - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + (event,) = events + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.anthropic" + assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_span_origin_async(sentry_init, capture_items): +async def test_span_origin_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2341,16 +3446,35 @@ async def test_span_origin_async(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - await client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + else: + events = capture_events() + + with start_transaction(name="anthropic"): + await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.anthropic" + assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.skipif( @@ -2418,6 +3542,7 @@ def test_set_output_data_with_input_json_delta(sentry_init): # Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "test_message,expected_role", [ @@ -2434,15 +3559,20 @@ def test_set_output_data_with_input_json_delta(sentry_init): ], ) def test_anthropic_message_role_mapping( - sentry_init, capture_items, test_message, expected_role + sentry_init, + capture_events, + capture_items, + test_message, + expected_role, + stream_gen_ai_spans, ): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2462,33 +3592,63 @@ def mock_messages_create(*args, **kwargs): test_messages = [test_message] - with start_transaction(name="anthropic tx"): - client.messages.create( - model="claude-3-opus", max_tokens=10, messages=test_messages + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-opus", max_tokens=10, messages=test_messages + ) + + span = next(item.payload for item in items if item.type == "span") + + # Verify that the span was created correctly + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + + # Parse the stored messages + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] ) + else: + events = capture_events() + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-opus", max_tokens=10, messages=test_messages + ) - span = next(item.payload for item in items if item.type == "span") + (event,) = events + span = event["spans"][0] - # Verify that the span was created correctly - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + # Verify that the span was created correctly + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - # Parse the stored messages - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + # Parse the stored messages + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert stored_messages[0]["role"] == expected_role -def test_anthropic_message_truncation(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_anthropic_message_truncation( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2504,41 +3664,83 @@ def test_anthropic_message_truncation(sentry_init, capture_items): {"role": "user", "content": "small message 5"}, ] - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + with start_transaction(): + client.messages.create(max_tokens=1024, messages=messages, model="model") - chat_span = chat_spans[0] - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT + ] - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + assert len(chat_spans) > 0 - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + chat_span = chat_spans[0] + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + + tx = next(item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with start_transaction(): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_anthropic_message_truncation_async(sentry_init, capture_items): +async def test_anthropic_message_truncation_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2554,21 +3756,44 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): {"role": "user", "content": "small message 5"}, ] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(): await client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT + ] + else: + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + if stream_gen_ai_spans: + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + else: + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2576,10 +3801,14 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") + if stream_gen_ai_spans: + tx = next(item.payload for item in items if item.type == "transaction") + else: + pass assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2590,15 +3819,21 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items): ], ) def test_nonstreaming_create_message_with_system_prompt( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2609,6 +3844,11 @@ def test_nonstreaming_create_message_with_system_prompt( } ] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(name="anthropic"): response = client.messages.create( max_tokens=1024, @@ -2623,48 +3863,103 @@ def test_nonstreaming_create_message_with_system_prompt( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + ) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "end_turn" + ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert len(event["spans"]) == 1 + (span,) = event["spans"] + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + + if stream_gen_ai_spans: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -2676,15 +3971,21 @@ def test_nonstreaming_create_message_with_system_prompt( ], ) async def test_nonstreaming_create_message_with_system_prompt_async( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async).""" sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2695,6 +3996,11 @@ async def test_nonstreaming_create_message_with_system_prompt_async( } ] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(name="anthropic"): response = await client.messages.create( max_tokens=1024, @@ -2709,48 +4015,95 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + ) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "end_turn" + ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2762,11 +4115,13 @@ async def test_nonstreaming_create_message_with_system_prompt_async( ) def test_streaming_create_message_with_system_prompt( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in streaming mode.""" client = Anthropic(api_key="z") @@ -2812,8 +4167,8 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -2822,12 +4177,76 @@ def test_streaming_create_message_with_system_prompt( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + stream=True, + system="You are a helpful assistant.", + ) + + for _ in message: + pass + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" + + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = client.messages.create( max_tokens=1024, messages=messages, @@ -2839,48 +4258,49 @@ def test_streaming_create_message_with_system_prompt( for _ in message: pass - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + assert len(events) == 1 + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert len(event["spans"]) == 1 + (span,) = event["spans"] - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] - else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2892,11 +4312,13 @@ def test_streaming_create_message_with_system_prompt( ) def test_stream_messages_with_system_prompt( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in streaming mode.""" client = Anthropic(api_key="z") @@ -2942,8 +4364,8 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -2952,63 +4374,119 @@ def test_stream_messages_with_system_prompt( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - system="You are a helpful assistant.", - ) as stream: - for event in stream: - pass - - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) as stream: + for event in stream: + pass - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) as stream: + for event in stream: + pass + + assert len(events) == 1 + (event,) = events - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + assert len(event["spans"]) == 1 + (span,) = event["spans"] + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -3021,12 +4499,14 @@ def test_stream_messages_with_system_prompt( ) async def test_stream_message_with_system_prompt_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in streaming mode (async).""" client = AsyncAnthropic(api_key="z") @@ -3074,8 +4554,8 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -3084,63 +4564,111 @@ async def test_stream_message_with_system_prompt_async( } ] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with mock.patch.object( client._client, "send", return_value=response, - ) as _: - with start_transaction(name="anthropic"): - async with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - system="You are a helpful assistant.", - ) as stream: - async for event in stream: - pass - - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) as stream: + async for event in stream: + pass - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -3153,12 +4681,14 @@ async def test_stream_message_with_system_prompt_async( ) async def test_streaming_create_message_with_system_prompt_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """Test that system prompts are properly captured in streaming mode (async).""" client = AsyncAnthropic(api_key="z") @@ -3206,8 +4736,8 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -3216,12 +4746,14 @@ async def test_streaming_create_message_with_system_prompt_async( } ] - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): message = await client.messages.create( max_tokens=1024, messages=messages, @@ -3232,57 +4764,126 @@ async def test_streaming_create_message_with_system_prompt_async( async for _ in message: pass + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + stream=True, + system="You are a helpful assistant.", + ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "anthropic" + async for _ in message: + pass - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "anthropic" - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat model" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."} - ] + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello, Claude" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + ) + + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert len(events) == 1 + (event,) = events - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + assert len(event["spans"]) == 1 + (span,) = event["spans"] -def test_system_prompt_with_complex_structure(sentry_init, capture_items): + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_system_prompt_with_complex_structure( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that complex system prompt structures (list of text blocks) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3299,34 +4900,72 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - response = client.messages.create( - max_tokens=1024, messages=messages, model="model", system=system_prompt + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, messages=messages, model="model", system=system_prompt + ) + + assert response == EXAMPLE_MESSAGE + + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans + + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) - assert response == EXAMPLE_MESSAGE + # System content should be a list of text blocks + assert isinstance(system_instructions, list) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (span,) = spans + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, messages=messages, model="model", system=system_prompt + ) - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert response == EXAMPLE_MESSAGE - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] - system_instructions = json.loads( - span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + assert len(events) == 1 + (event,) = events - # System content should be a list of text blocks - assert isinstance(system_instructions, list) - assert system_instructions == [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] + assert len(event["spans"]) == 1 + (span,) = event["spans"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + + # System content should be a list of text blocks + assert isinstance(system_instructions, list) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3528,14 +5167,21 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -def test_message_with_base64_image(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_base64_image( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3556,14 +5202,31 @@ def test_message_with_base64_image(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3578,14 +5241,21 @@ def test_message_with_base64_image(sentry_init, capture_items): } -def test_message_with_url_image(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_url_image( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with URL-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3605,13 +5275,30 @@ def test_message_with_url_image(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3621,14 +5308,21 @@ def test_message_with_url_image(sentry_init, capture_items): } -def test_message_with_file_image(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_file_image( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with file_id-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3649,13 +5343,30 @@ def test_message_with_file_image(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3665,14 +5376,21 @@ def test_message_with_file_image(sentry_init, capture_items): } -def test_message_with_base64_pdf(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_base64_pdf( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3693,13 +5411,30 @@ def test_message_with_base64_pdf(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -3709,14 +5444,21 @@ def test_message_with_base64_pdf(sentry_init, capture_items): } -def test_message_with_url_pdf(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_url_pdf( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with URL-referenced PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3736,13 +5478,30 @@ def test_message_with_url_pdf(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3752,14 +5511,21 @@ def test_message_with_url_pdf(sentry_init, capture_items): } -def test_message_with_file_document(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_file_document( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with file_id-referenced documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3780,13 +5546,30 @@ def test_message_with_file_document(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3796,14 +5579,21 @@ def test_message_with_file_document(sentry_init, capture_items): } -def test_message_with_mixed_content(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_mixed_content( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3840,13 +5630,30 @@ def test_message_with_mixed_content(sentry_init, capture_items): } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -3878,14 +5685,21 @@ def test_message_with_mixed_content(sentry_init, capture_items): } -def test_message_with_multiple_images_different_formats(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_message_with_multiple_images_different_formats( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3921,13 +5735,30 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 @@ -3952,14 +5783,21 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite assert content[3] == {"type": "text", "text": "Compare these three images."} -def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_binary_content_not_stored_when_pii_disabled( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that binary content is not stored when send_default_pii is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3980,24 +5818,46 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items) } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] -def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_binary_content_not_stored_when_prompts_disabled( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that binary content is not stored when include_prompts is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -4018,23 +5878,45 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it } ] - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans + + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] -def test_cache_tokens_nonstreaming(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_cache_tokens_nonstreaming( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4053,23 +5935,49 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items): ) ) - with start_transaction(name="anthropic"): - client.messages.create( - max_tokens=1024, - messages=[{"role": "user", "content": "Hello"}], - model="claude-3-5-sonnet-20241022", - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], + model="claude-3-5-sonnet-20241022", + ) + + (span,) = (item.payload for item in items if item.type == "span") + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], + model="claude-3-5-sonnet-20241022", + ) - (span,) = (item.payload for item in items if item.type == "span") - # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + (span,) = events[0]["spans"] + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 -def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_input_tokens_include_cache_write_nonstreaming( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming). @@ -4084,8 +5992,9 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4104,23 +6013,53 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item ) ) - with start_transaction(name="anthropic"): - client.messages.create( - max_tokens=1024, - messages=[{"role": "user", "content": "What is 3+3?"}], - model="claude-sonnet-4-20250514", + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 3+3?"}], + model="claude-sonnet-4-20250514", + ) + + (span,) = (item.payload for item in items if item.type == "span") + + # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert ( + span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 + ) # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 + assert ( + span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 ) + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 3+3?"}], + model="claude-sonnet-4-20250514", + ) - (span,) = (item.payload for item in items if item.type == "span") + (span,) = events[0]["spans"] - # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 + # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 -def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_input_tokens_include_cache_read_nonstreaming( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming). @@ -4135,8 +6074,9 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4155,27 +6095,52 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items ) ) - with start_transaction(name="anthropic"): - client.messages.create( - max_tokens=1024, - messages=[{"role": "user", "content": "What is 5+5?"}], - model="claude-sonnet-4-20250514", - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 5+5?"}], + model="claude-sonnet-4-20250514", + ) + + (span,) = [item.payload for item in items if item.type == "span"] + + # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert ( + span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 + ) # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + else: + events = capture_events() + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 5+5?"}], + model="claude-sonnet-4-20250514", + ) - (span,) = [item.payload for item in items if item.type == "span"] + (span,) = events[0]["spans"] - # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_input_tokens_include_cache_read_streaming( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming). @@ -4215,15 +6180,42 @@ def test_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + for _ in client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 5+5?"}], + model="claude-sonnet-4-20250514", + stream=True, + ): + pass + + (span,) = (item.payload for item in items if item.type == "span") + + # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert ( + span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 + ) # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): for _ in client.messages.create( max_tokens=1024, messages=[{"role": "user", "content": "What is 5+5?"}], @@ -4232,20 +6224,23 @@ def test_input_tokens_include_cache_read_streaming( ): pass - (span,) = (item.payload for item in items if item.type == "span") + (span,) = events[0]["spans"] - # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming). @@ -4284,33 +6279,64 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=[{"role": "user", "content": "What is 5+5?"}], - model="claude-sonnet-4-20250514", - ) as stream: - for event in stream: - pass + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 5+5?"}], + model="claude-sonnet-4-20250514", + ) as stream: + for event in stream: + pass + + (span,) = (item.payload for item in items if item.type == "span") + + # input_tokens should be total: 19 + 2846 = 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert ( + span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 + ) # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 5+5?"}], + model="claude-sonnet-4-20250514", + ) as stream: + for event in stream: + pass - (span,) = (item.payload for item in items if item.type == "span") + (span,) = events[0]["spans"] - # input_tokens should be total: 19 + 2846 = 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + # input_tokens should be total: 19 + 2846 = 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 -def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_input_tokens_unchanged_without_caching( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that input_tokens is unchanged when there are no cached tokens. @@ -4320,8 +6346,9 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4338,24 +6365,44 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): ) ) - with start_transaction(name="anthropic"): - client.messages.create( - max_tokens=1024, - messages=[{"role": "user", "content": "What is 2+2?"}], - model="claude-sonnet-4-20250514", - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 2+2?"}], + model="claude-sonnet-4-20250514", + ) + + (span,) = (item.payload for item in items if item.type == "span") + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 + else: + events = capture_events() - (span,) = (item.payload for item in items if item.type == "span") + with start_transaction(name="anthropic"): + client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "What is 2+2?"}], + model="claude-sonnet-4-20250514", + ) + + (span,) = events[0]["spans"] - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_cache_tokens_streaming( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """Test cache tokens are tracked for streaming responses.""" client = Anthropic(api_key="z") @@ -4391,15 +6438,40 @@ def test_cache_tokens_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + for _ in client.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], + model="claude-3-5-sonnet-20241022", + stream=True, + ): + pass + + (span,) = (item.payload for item in items if item.type == "span") + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): for _ in client.messages.create( max_tokens=1024, messages=[{"role": "user", "content": "Hello"}], @@ -4408,17 +6480,23 @@ def test_cache_tokens_streaming( ): pass - (span,) = (item.payload for item in items if item.type == "span") - # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + (span,) = events[0]["spans"] + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_stream_messages_cache_tokens( - sentry_init, capture_items, get_model_response, server_side_event_chunks + sentry_init, + capture_events, + capture_items, + get_model_response, + server_side_event_chunks, + stream_gen_ai_spans, ): """Test cache tokens are tracked for streaming responses.""" client = Anthropic(api_key="z") @@ -4454,27 +6532,50 @@ def test_stream_messages_cache_tokens( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _: - with start_transaction(name="anthropic"): - with client.messages.stream( - max_tokens=1024, - messages=[{"role": "user", "content": "Hello"}], - model="claude-3-5-sonnet-20241022", - ) as stream: - for event in stream: - pass + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], + model="claude-3-5-sonnet-20241022", + ) as stream: + for event in stream: + pass + + (span,) = (item.payload for item in items if item.type == "span") + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"), client.messages.stream( + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], + model="claude-3-5-sonnet-20241022", + ) as stream: + for event in stream: + pass - (span,) = (item.payload for item in items if item.type == "span") - # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + (span,) = events[0]["spans"] + # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 3974041314..3cc4b42bb2 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -114,6 +114,7 @@ def create_test_config( return genai_types.GenerateContentConfig(**config_dict) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -124,61 +125,123 @@ def create_test_config( ], ) def test_nonstreaming_generate_content( - sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_genai_client, + stream_gen_ai_spans, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, - "request", - return_value=mock_http_response, - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + mock_genai_client._api_client, + "request", + return_value=mock_http_response, + ), start_transaction(name="google_genai"): config = create_test_config(temperature=0.7, max_output_tokens=100) mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Tell me a joke", config=config ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "google_genai" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "google_genai" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - chat_span = next(item.payload for item in items if item.type == "span") + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") - # Check chat span - assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert chat_span["name"] == "chat gemini-1.5-flash" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + # Check chat span + assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert chat_span["name"] == "chat gemini-1.5-flash" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + ) - if send_default_pii and include_prompts: - # Response text is stored as a JSON array - response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - # Parse the JSON array - response_texts = json.loads(response_text) - assert response_texts == ["Hello! How can I help you today?"] + if send_default_pii and include_prompts: + # Response text is stored as a JSON array + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Parse the JSON array + response_texts = json.loads(response_text) + assert response_texts == ["Hello! How can I help you today?"] + else: + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"] + + # Check token usage + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + ) else: - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"] - - # Check token usage - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + events = capture_events() + with mock.patch.object( + mock_genai_client._api_client, + "request", + return_value=mock_http_response, + ), start_transaction(name="google_genai"): + config = create_test_config(temperature=0.7, max_output_tokens=100) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Tell me a joke", config=config + ) + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "google_genai" + + assert len(event["spans"]) == 1 + chat_span = event["spans"][0] + + # Check chat span + assert chat_span["op"] == OP.GEN_AI_CHAT + assert chat_span["description"] == "chat gemini-1.5-flash" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + + if send_default_pii and include_prompts: + # Response text is stored as a JSON array + if stream_gen_ai_spans: + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + else: + response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + # Parse the JSON array + response_texts = json.loads(response_text) + assert response_texts == ["Hello! How can I help you today?"] + else: + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"] + + # Check token usage + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize("generate_content_config", (False, True)) @pytest.mark.parametrize( "system_instructions,expected_texts", @@ -209,25 +272,29 @@ def test_nonstreaming_generate_content( ) def test_generate_content_with_system_instruction( sentry_init, + capture_events, capture_items, mock_genai_client, generate_content_config, system_instructions, expected_texts, + stream_gen_ai_spans, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): config = { "system_instruction": system_instructions, "temperature": 0.5, @@ -242,28 +309,66 @@ def test_generate_content_with_system_instruction( config=config, ) - invoke_span = next(item.payload for item in items if item.type == "span") + invoke_span = next(item.payload for item in items if item.type == "span") - if expected_texts is None: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"] - return + if expected_texts is None: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"] + return - # (PII is enabled and include_prompts is True in this test) - system_instructions = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + # (PII is enabled and include_prompts is True in this test) + system_instructions = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = { + "system_instruction": system_instructions, + "temperature": 0.5, + } + + if generate_content_config: + config = create_test_config(**config) + + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents="What is 2+2?", + config=config, + ) + + (event,) = events + invoke_span = event["spans"][0] + + if expected_texts is None: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"] + return + + # (PII is enabled and include_prompts is True in this test) + system_instructions = json.loads( + invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) assert system_instructions == [ {"type": "text", "content": text} for text in expected_texts ] -def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_generate_content_with_tools( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -308,27 +413,45 @@ def get_weather(location: str) -> str: mock_http_response = create_mock_http_response(tool_response_json) + if stream_gen_ai_spans: + items = capture_items("span") + else: + events = capture_events() + with mock.patch.object( mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - config = create_test_config(tools=[get_weather, mock_tool]) - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="What's the weather?", config=config - ) + ), start_transaction(name="google_genai"): + config = create_test_config(tools=[get_weather, mock_tool]) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="What's the weather?", config=config + ) + + if stream_gen_ai_spans: + invoke_span = next(item.payload for item in items if item.type == "span") - invoke_span = next(item.payload for item in items if item.type == "span") + # Check that tools are recorded (data is serialized as a string) + tools_data_str = invoke_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + else: + (event,) = events + invoke_span = event["spans"][0] - # Check that tools are recorded (data is serialized as a string) - tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + # Check that tools are recorded (data is serialized as a string) + tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] # Parse the JSON string to verify content tools_data = json.loads(tools_data_str) assert len(tools_data) == 2 # The order of tools may not be guaranteed, so sort by name and description for comparison - sorted_tools = sorted( - tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) - ) + if stream_gen_ai_spans: + sorted_tools = sorted( + tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) + ) + else: + sorted_tools = sorted( + tools_data, key=lambda t: (t.get("name", ""), t.get("description", "")) + ) # The function tool assert sorted_tools[0]["name"] == "get_weather" @@ -339,13 +462,19 @@ def get_weather(location: str) -> str: assert sorted_tools[1]["description"] == "Get weather information (tool object)" -def test_tool_execution(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_tool_execution( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -357,45 +486,79 @@ def get_weather(location: str) -> str: wrapped_weather = wrapped_tool(get_weather) + if stream_gen_ai_spans: + items = capture_items("span") + else: + events = capture_events() + # Execute the wrapped tool with start_transaction(name="test_tool"): result = wrapped_weather("San Francisco") assert result == "The weather in San Francisco is sunny" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - tool_span = next(item.payload for item in items if item.type == "span") - - assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL - assert tool_span["name"] == "execute_tool get_weather" - assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" - assert ( - tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] - == "Get the weather for a location" - ) + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + tool_span = next(item.payload for item in items if item.type == "span") + + assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + assert tool_span["name"] == "execute_tool get_weather" + assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" + assert ( + tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + == "Get the weather for a location" + ) + else: + (event,) = events + assert len(event["spans"]) == 1 + tool_span = event["spans"][0] + + assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL + assert tool_span["description"] == "execute_tool get_weather" + assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" + assert ( + tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + == "Get the weather for a location" + ) -def test_error_handling(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_error_handling( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction") + if stream_gen_ai_spans: + items = capture_items("event", "transaction") + else: + events = capture_events() # Mock an error at the HTTP level with mock.patch.object( mock_genai_client._api_client, "request", side_effect=Exception("API Error") + ), start_transaction(name="google_genai"), pytest.raises( + Exception, match="API Error" ): - with start_transaction(name="google_genai"): - with pytest.raises(Exception, match="API Error"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents="This will fail", - config=create_test_config(), - ) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents="This will fail", + config=create_test_config(), + ) - (error_event,) = (item.payload for item in items if item.type == "event") + if stream_gen_ai_spans: + (error_event,) = (item.payload for item in items if item.type == "event") + else: + # Should have both transaction and error events + assert len(events) == 2 + error_event, transaction_event = events assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -403,14 +566,21 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client): assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai" -def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_streaming_generate_content( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test streaming with generate_content_stream, verifying chunk accumulation.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Create streaming chunks - simulating a multi-chunk response # Chunk 1: First part of text with partial usage metadata @@ -474,17 +644,21 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien stream_chunks = [chunk1_json, chunk2_json, chunk3_json] mock_stream = create_mock_streaming_responses(stream_chunks) + if stream_gen_ai_spans: + items = capture_items("span") + else: + events = capture_events() + with mock.patch.object( mock_genai_client._api_client, "request_streamed", return_value=mock_stream - ): - with start_transaction(name="google_genai"): - config = create_test_config() - stream = mock_genai_client.models.generate_content_stream( - model="gemini-1.5-flash", contents="Stream me a response", config=config - ) + ), start_transaction(name="google_genai"): + config = create_test_config() + stream = mock_genai_client.models.generate_content_stream( + model="gemini-1.5-flash", contents="Stream me a response", config=config + ) - # Consume the stream (this is what users do with the integration wrapper) - collected_chunks = list(stream) + # Consume the stream (this is what users do with the integration wrapper) + collected_chunks = list(stream) # Verify we got all chunks assert len(collected_chunks) == 3 @@ -492,68 +666,125 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - chat_span = next(item.payload for item in items if item.type == "span") + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") - # Check that streaming flag is set on both spans - assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + # Check that streaming flag is set on both spans + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + else: + (event,) = events + + assert len(event["spans"]) == 1 + chat_span = event["spans"][0] + + # Check that streaming flag is set on both spans + assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True # Verify accumulated response text (all chunks combined) expected_full_text = "Hello! How can I help you today?" # Response text is stored as a JSON string - chat_response_text = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) + if stream_gen_ai_spans: + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + else: + chat_response_text = json.loads( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) assert chat_response_text == [expected_full_text] # Verify finish reasons (only the final chunk has a finish reason) # When there's a single finish reason, it's stored as a plain string (not JSON) - assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"] - assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + if stream_gen_ai_spans: + assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"] + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" + ) + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + ) + + # Verify model name + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + ) + else: + assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"] + assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 - # Verify model name - assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + # Verify model name + assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" -def test_span_origin(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_origin( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) + if stream_gen_ai_spans: + items = capture_items("span", "transaction") + else: + events = capture_events() + with mock.patch.object( mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - config = create_test_config() - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="Test origin", config=config - ) + ), start_transaction(name="google_genai"): + config = create_test_config() + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test origin", config=config + ) + + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + else: + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + assert event["contexts"]["trace"]["origin"] == "manual" + for span in event["spans"]: + assert span["origin"] == "auto.ai.google_genai" -def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_response_without_usage_metadata( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test handling of responses without usage metadata""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response without usage metadata response_json = { @@ -570,31 +801,58 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_ mock_http_response = create_mock_http_response(response_json) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config() + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test", config=config + ) + + chat_span = next(item.payload for item in items if item.type == "span") + + # Usage data should not be present + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): config = create_test_config() mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Test", config=config ) - chat_span = next(item.payload for item in items if item.type == "span") + (event,) = events + chat_span = event["spans"][0] - # Usage data should not be present - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"] - assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"] - assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"] + # Usage data should not be present + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"] + assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"] + assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"] -def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_multiple_candidates( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test handling of multiple response candidates""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response with multiple candidates multi_candidate_json = { @@ -623,20 +881,38 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): mock_http_response = create_mock_http_response(multi_candidate_json) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config() + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Generate multiple", config=config + ) + + chat_span = next(item.payload for item in items if item.type == "span") + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): config = create_test_config() mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Generate multiple", config=config ) - chat_span = next(item.payload for item in items if item.type == "span") + (event,) = events + chat_span = event["spans"][0] # Should capture all responses # Response text is stored as a JSON string when there are multiple responses - response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + if stream_gen_ai_spans: + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + else: + response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] if isinstance(response_text, str) and response_text.startswith("["): # It's a JSON array response_list = json.loads(response_text) @@ -646,26 +922,73 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): assert response_text == "Response 1\nResponse 2" # Finish reasons are serialized as JSON - finish_reasons = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] - ) + if stream_gen_ai_spans: + finish_reasons = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] + ) + else: + finish_reasons = json.loads( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] + ) assert finish_reasons == ["STOP", "MAX_TOKENS"] -def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_all_configuration_parameters( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test that all configuration parameters are properly recorded""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config( + temperature=0.8, + top_p=0.95, + top_k=40, + max_output_tokens=2048, + presence_penalty=0.1, + frequency_penalty=0.2, + seed=12345, + ) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test all params", config=config + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + # Check all parameters are recorded + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + ) + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): config = create_test_config( temperature=0.8, top_p=0.95, @@ -679,56 +1002,88 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli model="gemini-1.5-flash", contents="Test all params", config=config ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] - # Check all parameters are recorded - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 + # Check all parameters are recorded + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 -def test_empty_response(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_empty_response( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test handling of minimal response with no content""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Minimal response with empty candidates array minimal_response_json = {"candidates": []} mock_http_response = create_mock_http_response(minimal_response_json) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + response = mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test", config=create_test_config() + ) + + # Response will have an empty candidates list + assert response is not None + assert len(response.candidates) == 0 + + # Should still create spans even with empty candidates + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): response = mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Test", config=create_test_config() ) - # Response will have an empty candidates list - assert response is not None - assert len(response.candidates) == 0 + # Response will have an empty candidates list + assert response is not None + assert len(response.candidates) == 0 - # Should still create spans even with empty candidates - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 + (event,) = events + # Should still create spans even with empty candidates + assert len(event["spans"]) == 1 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_response_with_different_id_fields( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test handling of different response ID field names""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response with response_id and model_version response_json = { @@ -747,21 +1102,40 @@ def test_response_with_different_id_fields( mock_http_response = create_mock_http_response(response_json) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Test", config=create_test_config() ) - chat_span = next(item.payload for item in items if item.type == "span") + chat_span = next(item.payload for item in items if item.type == "span") - assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" - assert ( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] - == "gemini-1.5-flash-001" - ) + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] + == "gemini-1.5-flash-001" + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test", config=create_test_config() + ) + + (event,) = events + chat_span = event["spans"][0] + + assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" + assert ( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001" + ) def test_tool_with_async_function(sentry_init): @@ -785,40 +1159,72 @@ async def async_tool(param: str) -> str: assert hasattr(wrapped_async_tool, "__wrapped__") # Should preserve original -def test_contents_as_none(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_contents_as_none( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test handling when contents parameter is None""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=None, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + # Should handle None contents gracefully + messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=None, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + # Should handle None contents gracefully + messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) - # Should handle None contents gracefully - messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) # Should only have system message if any, not user message assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages) -def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_tool_calls_extraction( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test extraction of tool/function calls from response""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response with function calls function_call_response_json = { @@ -857,27 +1263,49 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): mock_http_response = create_mock_http_response(function_call_response_json) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="What's the weather and time?", config=create_test_config(), ) - chat_span = next( - item.payload for item in items if item.type == "span" - ) # The chat span + chat_span = next( + item.payload for item in items if item.type == "span" + ) # The chat span - # Check that tool calls are extracted and stored - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"] + # Check that tool calls are extracted and stored + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"] - # Parse the JSON string to verify content - tool_calls = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - ) + # Parse the JSON string to verify content + tool_calls = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents="What's the weather and time?", + config=create_test_config(), + ) + + (event,) = events + chat_span = event["spans"][0] # The chat span + + # Check that tool calls are extracted and stored + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"] + + # Parse the JSON string to verify content + tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]) assert len(tool_calls) == 2 @@ -897,14 +1325,21 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_google_genai_message_truncation( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -913,26 +1348,45 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=[large_content, small_content], config=create_test_config(), ) - invoke_span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + invoke_span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert parsed_messages[0]["role"] == "user" + messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents=[large_content, small_content], + config=create_test_config(), + ) + + (event,) = events + invoke_span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert parsed_messages[0]["role"] == "user" # What "small content" becomes because the large message used the entire character limit assert "..." in parsed_messages[0]["content"][1]["text"] @@ -962,6 +1416,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ } +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -972,24 +1427,32 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_ ], ) def test_embed_content( - sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_genai_client, + stream_gen_ai_spans, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, - "request", - return_value=mock_http_response, - ): - with start_transaction(name="google_genai_embeddings"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + mock_genai_client._api_client, + "request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings"): mock_genai_client.models.embed_content( model="text-embedding-004", contents=[ @@ -998,49 +1461,136 @@ def test_embed_content( ], ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "google_genai_embeddings" + (event,) = (item.payload for item in items if item.type == "transaction") - # Should have 1 span for embeddings - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (embed_span,) = spans + assert event["transaction"] == "google_genai_embeddings" - # Check embeddings span - assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["name"] == "embeddings text-embedding-004" - assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert ( - embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" - ) + # Should have 1 span for embeddings + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans - # Check input texts if PII is allowed - if send_default_pii and include_prompts: - input_texts = json.loads( - embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Check embeddings span + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-004" ) - assert input_texts == [ - "What is your name?", - "What is your favorite color?", - ] + + # Check input texts if PII is allowed + if send_default_pii and include_prompts: + if stream_gen_ai_spans: + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + else: + input_texts = json.loads( + embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + assert input_texts == [ + "What is your name?", + "What is your favorite color?", + ] + else: + if stream_gen_ai_spans: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + + # Check usage data (sum of token counts from statistics: 10 + 15 = 25) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if stream_gen_ai_spans: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + ) + else: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] + events = capture_events() - # Check usage data (sum of token counts from statistics: 10 + 15 = 25) - # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + with mock.patch.object( + mock_genai_client._api_client, + "request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings"): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents=[ + "What is your name?", + "What is your favorite color?", + ], + ) + assert len(events) == 1 + (event,) = events -def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client): + assert event["type"] == "transaction" + + assert event["transaction"] == "google_genai_embeddings" + + # Should have 1 span for embeddings + assert len(event["spans"]) == 1 + (embed_span,) = event["spans"] + + # Check embeddings span + assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["description"] == "embeddings text-embedding-004" + assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + + # Check input texts if PII is allowed + if send_default_pii and include_prompts: + if stream_gen_ai_spans: + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + else: + input_texts = json.loads( + embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + assert input_texts == [ + "What is your name?", + "What is your favorite color?", + ] + else: + if stream_gen_ai_spans: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + + # Check usage data (sum of token counts from statistics: 10 + 15 = 25) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if stream_gen_ai_spans: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + ) + else: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_embed_content_string_input( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test embed_content with a single string instead of list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1059,49 +1609,112 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien } mock_http_response = create_mock_http_response(single_embed_response) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): mock_genai_client.models.embed_content( model="text-embedding-004", contents="Single text input", ) - spans = [item.payload for item in items if item.type == "span"] - (embed_span,) = spans + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans - # Check that single string is handled correctly - input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) - assert input_texts == ["Single text input"] - # Should use token_count from statistics (5), not billable_character_count (10) - # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + # Check that single string is handled correctly + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + + assert input_texts == ["Single text input"] + # Should use token_count from statistics (5), not billable_character_count (10) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if stream_gen_ai_spans: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + else: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents="Single text input", + ) + (event,) = events + (embed_span,) = event["spans"] -def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client): + # Check that single string is handled correctly + input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + + assert input_texts == ["Single text input"] + # Should use token_count from statistics (5), not billable_character_count (10) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if stream_gen_ai_spans: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + else: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_embed_content_error_handling( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test error handling in embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "event") + if stream_gen_ai_spans: + items = capture_items("transaction", "event") + + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, + "request", + side_effect=Exception("Embedding API Error"), + ), start_transaction(name="google_genai_embeddings"), pytest.raises( + Exception, match="Embedding API Error" + ): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents=["This will fail"], + ) - # Mock an error at the HTTP level - with mock.patch.object( - mock_genai_client._api_client, - "request", - side_effect=Exception("Embedding API Error"), - ): - with start_transaction(name="google_genai_embeddings"): - with pytest.raises(Exception, match="Embedding API Error"): - mock_genai_client.models.embed_content( - model="text-embedding-004", - contents=["This will fail"], - ) + (error_event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, + "request", + side_effect=Exception("Embedding API Error"), + ), start_transaction(name="google_genai_embeddings"), pytest.raises( + Exception, match="Embedding API Error" + ): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents=["This will fail"], + ) - (error_event,) = (item.payload for item in items if item.type == "event") + # Should have both transaction and error events + assert len(events) == 2 + error_event, _ = events assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1109,15 +1722,20 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_embed_content_without_statistics( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1133,50 +1751,92 @@ def test_embed_content_without_statistics( } mock_http_response = create_mock_http_response(old_version_response) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): mock_genai_client.models.embed_content( model="text-embedding-004", contents=["Test without statistics", "Another test"], ) - spans = [item.payload for item in items if item.type == "span"] - (embed_span,) = spans + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans - # No usage tokens since there are no statistics in older versions - # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] + # No usage tokens since there are no statistics in older versions + # This is expected and the integration should handle it gracefully + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents=["Test without statistics", "Another test"], + ) + + (event,) = events + (embed_span,) = event["spans"] + + # No usage tokens since there are no statistics in older versions + # This is expected and the integration should handle it gracefully + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] -def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_embed_content_span_origin( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test that embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): + mock_genai_client.models.embed_content( + model="text-embedding-004", + contents=["Test origin"], + ) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings"): + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + else: + events = capture_events() + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai_embeddings"): mock_genai_client.models.embed_content( model="text-embedding-004", contents=["Test origin"], ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + assert event["contexts"]["trace"]["origin"] == "manual" + for span in event["spans"]: + assert span["origin"] == "auto.ai.google_genai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -1188,25 +1848,33 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client ], ) async def test_async_embed_content( - sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_genai_client, + stream_gen_ai_spans, ): """Test async embed_content method.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") # Mock the async HTTP response mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, - "async_request", - return_value=mock_http_response, - ): - with start_transaction(name="google_genai_embeddings_async"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): await mock_genai_client.aio.models.embed_content( model="text-embedding-004", contents=[ @@ -1215,52 +1883,109 @@ async def test_async_embed_content( ], ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "google_genai_embeddings_async" + (event,) = (item.payload for item in items if item.type == "transaction") - # Should have 1 span for embeddings - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 - (embed_span,) = spans + assert event["transaction"] == "google_genai_embeddings_async" - # Check embeddings span - assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["name"] == "embeddings text-embedding-004" - assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert ( - embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" - ) + # Should have 1 span for embeddings + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans - # Check input texts if PII is allowed - if send_default_pii and include_prompts: - input_texts = json.loads( - embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Check embeddings span + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-004" ) - assert input_texts == [ - "What is your name?", - "What is your favorite color?", - ] + + # Check input texts if PII is allowed + if send_default_pii and include_prompts: + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + assert input_texts == [ + "What is your name?", + "What is your favorite color?", + ] + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] + + # Check usage data (sum of token counts from statistics: 10 + 15 = 25) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] + events = capture_events() - # Check usage data (sum of token counts from statistics: 10 + 15 = 25) - # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents=[ + "What is your name?", + "What is your favorite color?", + ], + ) + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + + assert event["transaction"] == "google_genai_embeddings_async" + # Should have 1 span for embeddings + assert len(event["spans"]) == 1 + (embed_span,) = event["spans"] + # Check embeddings span + assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["description"] == "embeddings text-embedding-004" + assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + + # Check input texts if PII is allowed + if send_default_pii and include_prompts: + input_texts = json.loads( + embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + assert input_texts == [ + "What is your name?", + "What is your favorite color?", + ] + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + + # Check usage data (sum of token counts from statistics: 10 + 15 = 25) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_async_embed_content_string_input( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test async embed_content with a single string instead of list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1279,52 +2004,108 @@ async def test_async_embed_content_string_input( } mock_http_response = create_mock_http_response(single_embed_response) - with mock.patch.object( - mock_genai_client._api_client, "async_request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings_async"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents="Single text input", + ) + + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans + + # Check that single string is handled correctly + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): await mock_genai_client.aio.models.embed_content( model="text-embedding-004", contents="Single text input", ) - spans = [item.payload for item in items if item.type == "span"] - (embed_span,) = spans + (event,) = events + (embed_span,) = event["spans"] + + # Check that single string is handled correctly + input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) - # Check that single string is handled correctly - input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if stream_gen_ai_spans: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + else: + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_async_embed_content_error_handling( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test error handling in async embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "event") - # Mock an error at the HTTP level - with mock.patch.object( - mock_genai_client._api_client, - "async_request", - side_effect=Exception("Async Embedding API Error"), - ): - with start_transaction(name="google_genai_embeddings_async"): - with pytest.raises(Exception, match="Async Embedding API Error"): - await mock_genai_client.aio.models.embed_content( - model="text-embedding-004", - contents=["This will fail"], - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "event") + + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + side_effect=Exception("Async Embedding API Error"), + ), start_transaction(name="google_genai_embeddings_async"), pytest.raises( + Exception, match="Async Embedding API Error" + ): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents=["This will fail"], + ) + + (error_event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + side_effect=Exception("Async Embedding API Error"), + ), start_transaction(name="google_genai_embeddings_async"), pytest.raises( + Exception, match="Async Embedding API Error" + ): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents=["This will fail"], + ) - (error_event,) = (item.payload for item in items if item.type == "event") + # Should have both transaction and error events + assert len(events) == 2 + error_event, _ = events assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1332,16 +2113,21 @@ async def test_async_embed_content_error_handling( assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_async_embed_content_without_statistics( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test async embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1357,64 +2143,118 @@ async def test_async_embed_content_without_statistics( } mock_http_response = create_mock_http_response(old_version_response) - with mock.patch.object( - mock_genai_client._api_client, "async_request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings_async"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): await mock_genai_client.aio.models.embed_content( model="text-embedding-004", contents=["Test without statistics", "Another test"], ) - spans = [item.payload for item in items if item.type == "span"] - (embed_span,) = spans + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents=["Test without statistics", "Another test"], + ) + + (event,) = events + (embed_span,) = event["spans"] # No usage tokens since there are no statistics in older versions # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] + if stream_gen_ai_spans: + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] + else: + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_async_embed_content_span_origin( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test that async embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) - with mock.patch.object( - mock_genai_client._api_client, "async_request", return_value=mock_http_response - ): - with start_transaction(name="google_genai_embeddings_async"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): + await mock_genai_client.aio.models.embed_content( + model="text-embedding-004", + contents=["Test origin"], + ) + + (event,) = [item.payload for item in items if item.type == "transaction"] + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, + "async_request", + return_value=mock_http_response, + ), start_transaction(name="google_genai_embeddings_async"): await mock_genai_client.aio.models.embed_content( model="text-embedding-004", contents=["Test origin"], ) - (event,) = [item.payload for item in items if item.type == "transaction"] - assert event["contexts"]["trace"]["origin"] == "manual" + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" + assert event["contexts"]["trace"]["origin"] == "manual" + for span in event["spans"]: + assert span["origin"] == "auto.ai.google_genai" # Integration tests for generate_content with different input message formats +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_content_object( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with Content object input.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1423,17 +2263,36 @@ def test_generate_content_with_content_object( role="user", parts=[genai_types.Part(text="Hello from Content object")] ) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=content, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1441,33 +2300,57 @@ def test_generate_content_with_content_object( ] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_dict_format( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with dict format input (ContentDict).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) # Dict format content contents = {"role": "user", "parts": [{"text": "Hello from dict format"}]} - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1475,14 +2358,21 @@ def test_generate_content_with_dict_format( ] -def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_generate_content_with_file_data( + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, +): """Test generate_content with file_data (external file reference).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1498,17 +2388,36 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ ], ) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=content, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1522,16 +2431,21 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_ assert messages[0]["content"][1]["uri"] == "gs://bucket/image.jpg" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_inline_data( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with inline_data (binary data).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1546,17 +2460,36 @@ def test_generate_content_with_inline_data( ], ) - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=content, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1567,16 +2500,21 @@ def test_generate_content_with_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_function_response( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with function_response (tool result).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1602,17 +2540,36 @@ def test_generate_content_with_function_response( ), ] - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -1621,16 +2578,21 @@ def test_generate_content_with_function_response( assert messages[0]["content"]["output"] == "Sunny, 72F" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_mixed_string_and_content( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1647,57 +2609,105 @@ def test_generate_content_with_mixed_string_and_content( ), ] - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_part_object_directly( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with Part object directly (not wrapped in Content).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) # Part object directly part = genai_types.Part(text="Direct Part object") - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=part, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=part, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_list_of_dicts( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """ Test generate_content with list of dict format inputs. @@ -1710,8 +2720,8 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1722,32 +2732,56 @@ def test_generate_content_with_list_of_dicts( {"role": "user", "parts": [{"text": "Second user message"}]}, ] - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_dict_inline_data( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): """Test generate_content with dict format containing inline_data.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1760,17 +2794,36 @@ def test_generate_content_with_dict_inline_data( ], } - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1783,15 +2836,20 @@ def test_generate_content_with_dict_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_without_parts_property_inline_data( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1800,17 +2858,35 @@ def test_generate_content_without_parts_property_inline_data( {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}}, ] - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 @@ -1826,15 +2902,20 @@ def test_generate_content_without_parts_property_inline_data( assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/gif" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string( - sentry_init, capture_items, mock_genai_client + sentry_init, + capture_events, + capture_items, + mock_genai_client, + stream_gen_ai_spans, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1848,17 +2929,36 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit }, ] - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") + invoke_span = next(item.payload for item in items if item.type == "span") + + messages = json.loads( + invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 3c79ca7262..d691a58c31 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -17,7 +17,7 @@ if TYPE_CHECKING: - from typing import Any + pass HF_VERSION = package_version("huggingface-hub") @@ -466,173 +466,327 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock): yield rsps +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_text_generation_api: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_text_generation_api, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = InferenceClient(model="test-model") - with sentry_sdk.start_transaction(name="test"): - client.text_generation( - "Hello", - stream=False, - details=True, - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" - assert span["name"] == "text_completion test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": False, - "gen_ai.usage.total_tokens": 10, - "sentry.environment": "production", - "sentry.op": "gen_ai.text_completion", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = "Hello" - expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?" - - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data - - assert span["attributes"] == expected_data - - # text generation does not set the response model - assert "gen_ai.response.model" not in span["attributes"] + with sentry_sdk.start_transaction(name="test"): + client.text_generation( + "Hello", + stream=False, + details=True, + ) + + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": False, + "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can i help you?" + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["attributes"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["attributes"] + else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"): + client.text_generation( + "Hello", + stream=False, + details=True, + ) + + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.text_completion" + assert span["description"] == "text_completion test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": False, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can i help you?" + ) + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert span["data"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation_streaming( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_text_generation_api_streaming: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_text_generation_api_streaming, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = InferenceClient(model="test-model") - with sentry_sdk.start_transaction(name="test"): - for _ in client.text_generation( - prompt="Hello", - stream=True, - details=True, - ): - pass - - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with sentry_sdk.start_transaction(name="test"): + for _ in client.text_generation( + prompt="Hello", + stream=True, + details=True, + ): + pass + + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + if stream_gen_ai_spans: + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" - assert span["name"] == "text_completion test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": True, - "gen_ai.usage.total_tokens": 10, - "sentry.environment": "production", - "sentry.op": "gen_ai.text_completion", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = "Hello" - expected_data["gen_ai.response.text"] = "the mocked model response" - - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data - - assert span["attributes"] == expected_data - - # text generation does not set the response model - assert "gen_ai.response.model" not in span["attributes"] + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "the mocked model response" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["attributes"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["attributes"] + else: + events = capture_events() + with sentry_sdk.start_transaction(name="test"): + for _ in client.text_generation( + prompt="Hello", + stream=True, + details=True, + ): + pass + + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.text_completion" + assert span["description"] == "text_completion test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + if stream_gen_ai_spans: + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "the mocked model response" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_chat_completion_api: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() client = get_hf_provider_inference_client() @@ -642,247 +796,451 @@ def test_chat_completion( stream=False, ) - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can I help you today?" + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["attributes"] == expected_data + else: + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + if stream_gen_ai_spans: + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "chat", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "stop", - "gen_ai.response.model": "test-model-123", - "gen_ai.response.streaming": False, - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.output_tokens": 8, - "gen_ai.usage.total_tokens": 18, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "Hello!"}]' - ) - expected_data["gen_ai.response.text"] = ( - "[mocked] Hello! How can I help you today?" - ) + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can I help you today?" + ) - assert span["attributes"] == expected_data + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert span["data"] == expected_data + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_chat_completion_api_streaming: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = get_hf_provider_inference_client() - with sentry_sdk.start_transaction(name="test"): - _ = list( - client.chat_completion( - [{"role": "user", "content": "Hello!"}], - stream=True, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + [{"role": "user", "content": "Hello!"}], + stream=True, + ) ) - ) - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "chat", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "stop", - "gen_ai.response.model": "test-model-123", - "gen_ai.response.streaming": True, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - # usage is not available in older versions of the library - if HF_VERSION and HF_VERSION >= (0, 26, 0): - expected_data["gen_ai.usage.input_tokens"] = 183 - expected_data["gen_ai.usage.output_tokens"] = 14 - expected_data["gen_ai.usage.total_tokens"] = 197 - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "Hello!"}]' - ) - expected_data["gen_ai.response.text"] = "the mocked model response" + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + # usage is not available in older versions of the library + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = "the mocked model response" - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data - assert span["attributes"] == expected_data + assert span["attributes"] == expected_data + else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + [{"role": "user", "content": "Hello!"}], + stream=True, + ) + ) + + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + # usage is not available in older versions of the library + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = "the mocked model response" + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert span["data"] == expected_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_chat_completion_api_error( - sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" -) -> None: - sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) - items = capture_items("event", "transaction", "span") + sentry_init, + capture_events, + capture_items, + mock_hf_api_with_errors, + stream_gen_ai_spans, +): + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) client = get_hf_provider_inference_client() + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") - with sentry_sdk.start_transaction(name="test"): - with pytest.raises(HfHubHTTPError): + with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError): client.chat_completion( messages=[{"role": "user", "content": "Hello!"}], ) - (error,) = (item.payload for item in items if item.type == "event") - assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" - assert not error["exception"]["values"][0]["mechanism"]["handled"] + (error,) = (item.payload for item in items if item.type == "event") - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" + assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" + assert not error["exception"]["values"][0]["mechanism"]["handled"] - assert span is not None + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - assert span["status"] == "error" + assert span is not None - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert ( - error["contexts"]["trace"]["trace_id"] - == transaction["contexts"]["trace"]["trace_id"] - ) - expected_data = { - "gen_ai.operation.name": "chat", - "gen_ai.request.model": "test-model", - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - assert span["attributes"] == expected_data + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + assert span["status"] == "error" + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert ( + error["contexts"]["trace"]["trace_id"] + == transaction["contexts"]["trace"]["trace_id"] + ) + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert span["attributes"] == expected_data + else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + ) + + ( + error, + transaction, + ) = events + + assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" + assert not error["exception"]["values"][0]["mechanism"]["handled"] + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + assert span["status"] == "internal_error" + assert span.get("tags", {}).get("status") == "internal_error" + + assert ( + error["contexts"]["trace"]["trace_id"] + == transaction["contexts"]["trace"]["trace_id"] + ) + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert span["data"] == expected_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_span_status_error( - sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" -) -> None: - sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) - items = capture_items("event", "transaction", "span") - + sentry_init, + capture_events, + capture_items, + mock_hf_api_with_errors, + stream_gen_ai_spans, +): client = get_hf_provider_inference_client() - with sentry_sdk.start_transaction(name="test"): - with pytest.raises(HfHubHTTPError): + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError): client.chat_completion( messages=[{"role": "user", "content": "Hello!"}], ) - (error,) = [item.payload for item in items if item.type == "event"] - assert error["level"] == "error" + (error,) = [item.payload for item in items if item.type == "event"] + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + assert span["status"] == "error" + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" + with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + ) + + (error, transaction) = events + assert error["level"] == "error" + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" - assert span is not None - assert span["status"] == "error" + assert span is not None + assert span["status"] == "internal_error" + assert span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_with_tools( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_chat_completion_api_tools: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_tools, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = get_hf_provider_inference_client() @@ -901,83 +1259,148 @@ def test_chat_completion_with_tools( } ] - with sentry_sdk.start_transaction(name="test"): - client.chat_completion( - messages=[{"role": "user", "content": "What is the weather in Paris?"}], - tools=tools, - tool_choice="auto", - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "chat", - "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "tool_calls", - "gen_ai.response.model": "test-model-123", - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.output_tokens": 8, - "gen_ai.usage.total_tokens": 18, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "What is the weather in Paris?"}]' - ) - expected_data["gen_ai.response.tool_calls"] = ( - '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' - ) + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + tools=tools, + tool_choice="auto", + ) + + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data + + assert span["attributes"] == expected_data + else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + tools=tools, + tool_choice="auto", + ) + + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' + ) - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data - assert "gen_ai.response.tool_calls" not in expected_data + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data - assert span["attributes"] == expected_data + assert span["data"] == expected_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming_with_tools( - sentry_init: "Any", - capture_items: "Any", - send_default_pii: "Any", - include_prompts: "Any", - mock_hf_chat_completion_api_streaming_tools: "Any", -) -> None: + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming_tools, + stream_gen_ai_spans, +): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() client = get_hf_provider_inference_client() @@ -1006,59 +1429,110 @@ def test_chat_completion_streaming_with_tools( ) ) - spans = [item.payload for item in items if item.type == "span"] - span = None - for sp in spans: - if sp["attributes"]["sentry.op"].startswith("gen_ai"): - assert span is None, "there is exactly one gen_ai span" - span = sp - else: - # there should be no other spans, just the gen_ai span - # and optionally some http.client spans from talking to the hf api - assert sp["attributes"]["sentry.op"] == "http.client" - - assert span is not None - - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - - expected_data = { - "gen_ai.operation.name": "chat", - "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "tool_calls", - "gen_ai.response.model": "test-model-123", - "gen_ai.response.streaming": True, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - if HF_VERSION and HF_VERSION >= (0, 26, 0): - expected_data["gen_ai.usage.input_tokens"] = 183 - expected_data["gen_ai.usage.output_tokens"] = 14 - expected_data["gen_ai.usage.total_tokens"] = 197 - - if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "What is the weather in Paris?"}]' - ) - expected_data["gen_ai.response.text"] = "response with tool calls follows" - expected_data["gen_ai.response.tool_calls"] = ( - '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' - ) + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + span = None + for sp in spans: + if sp["attributes"]["sentry.op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["attributes"]["sentry.op"] == "http.client" + + assert span is not None + + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.text"] = "response with tool calls follows" + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data + + assert span["attributes"] == expected_data + else: + (transaction,) = events + + span = None + for sp in transaction["spans"]: + if sp["op"].startswith("gen_ai"): + assert span is None, "there is exactly one gen_ai span" + span = sp + else: + # there should be no other spans, just the gen_ai span + # and optionally some http.client spans from talking to the hf api + assert sp["op"] == "http.client" + + assert span is not None + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.text"] = "response with tool calls follows" + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' + ) - if not send_default_pii or not include_prompts: - assert "gen_ai.request.messages" not in expected_data - assert "gen_ai.response.text" not in expected_data - assert "gen_ai.response.tool_calls" not in expected_data + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data - assert span["attributes"] == expected_data + assert span["data"] == expected_data diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 5c700180cd..7adb2d13c5 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -257,10 +257,13 @@ def _llm_type(self) -> str: return llm_type +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_text_completion( sentry_init, + capture_events, capture_items, get_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -270,8 +273,8 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") model_response = get_model_response( Completion( @@ -302,45 +305,87 @@ def test_langchain_text_completion( openai_api_key="badkey", ) - with patch.object( - model.client._client._client, - "send", - return_value=model_response, - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + model.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): input_text = "What is the capital of France?" model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"}) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" + tx = next(item.payload for item in items if item.type == "transaction") + assert tx["type"] == "transaction" - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - assert len(llm_spans) > 0 + spans = [item.payload for item in items if item.type == "span"] + llm_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" + ] - llm_span = llm_spans[0] - assert llm_span["name"] == "text_completion gpt-3.5-turbo" - assert llm_span["attributes"]["gen_ai.system"] == "openai" - assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" - assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo" - assert ( - llm_span["attributes"]["gen_ai.response.text"] - == "The capital of France is Paris." - ) - assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25 - assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15 + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + assert llm_span["name"] == "text_completion gpt-3.5-turbo" + assert llm_span["attributes"]["gen_ai.system"] == "openai" + assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo" + assert ( + llm_span["attributes"]["gen_ai.response.text"] + == "The capital of France is Paris." + ) + assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25 + assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15 + else: + events = capture_events() + + with patch.object( + model.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): + input_text = "What is the capital of France?" + model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"}) + + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + assert len(llm_spans) > 0 + llm_span = llm_spans[0] + + assert llm_span["description"] == "text_completion gpt-3.5-turbo" + assert llm_span["data"]["gen_ai.system"] == "openai" + assert llm_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" + assert ( + llm_span["data"]["gen_ai.response.text"] + == "The capital of France is Paris." + ) + assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 + assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_chat_with_run_name( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -350,8 +395,8 @@ def test_langchain_chat_with_run_name( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") request_headers = {} # Changed in https://github.com/langchain-ai/langchain/pull/32655 @@ -380,28 +425,56 @@ def test_langchain_chat_with_run_name( openai_api_key="badkey", ) - with patch.object( - llm.client._client._client, - "send", - return_value=model_response, - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("span") + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): llm.invoke( "How many letters in the word eudca", config={"run_name": "my-snazzy-pipeline"}, ) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - assert len(chat_spans) == 1 - assert ( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" - ) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + assert len(chat_spans) == 1 + assert ( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] + == "my-snazzy-pipeline" + ) + else: + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): + llm.invoke( + "How many letters in the word eudca", + config={"run_name": "my-snazzy-pipeline"}, + ) + + tx = events[0] + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert ( + chat_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_tool_call_with_run_name( sentry_init, + capture_events, capture_items, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -411,25 +484,44 @@ def test_langchain_tool_call_with_run_name( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(): + get_word_length.invoke( + {"word": "eudca"}, + config={"run_name": "my-snazzy-pipeline"}, + ) - with start_transaction(): - get_word_length.invoke( - {"word": "eudca"}, - config={"run_name": "my-snazzy-pipeline"}, + spans = [item.payload for item in items if item.type == "span"] + tool_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) + assert len(tool_spans) == 1 + assert ( + tool_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] + == "my-snazzy-pipeline" ) + else: + events = capture_events() - spans = [item.payload for item in items if item.type == "span"] - tool_spans = list( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) - assert len(tool_spans) == 1 - assert ( - tool_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" - ) + with start_transaction(): + get_word_length.invoke( + {"word": "eudca"}, + config={"run_name": "my-snazzy-pipeline"}, + ) + + tx = events[0] + tool_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + assert len(tool_spans) == 1 + assert ( + tool_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", @@ -456,6 +548,7 @@ def test_langchain_tool_call_with_run_name( ) def test_langchain_create_agent( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, @@ -463,6 +556,7 @@ def test_langchain_create_agent( request, get_model_response, nonstreaming_responses_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -472,8 +566,8 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") model_response = get_model_response( nonstreaming_responses_model_response, @@ -496,12 +590,14 @@ def test_langchain_create_agent( name="word_length_agent", ) - with patch.object( - llm.client._client._client, - "send", - return_value=model_response, - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): agent.invoke( { "messages": [ @@ -510,61 +606,135 @@ def test_langchain_create_agent( }, ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - assert len(chat_spans) == 1 - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" - assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent" + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + assert len(chat_spans) == 1 + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent" - if send_default_pii and include_prompts: - assert ( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == "Hello, how can I help you?" - ) + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30 - param_id = request.node.callspec.id - if "string" in param_id: - assert [ - { - "type": "text", - "content": "You are very powerful assistant, but don't know current events", - } - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + if send_default_pii and include_prompts: + assert ( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == "Hello, how can I help you?" ) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) else: - assert [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get( + "attributes", {} + ) + + else: + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _, start_transaction(): + agent.invoke( { - "type": "text", - "content": "Be concise and clear.", + "messages": [ + HumanMessage(content="How many letters in the word eudca"), + ], }, - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) - else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert chat_spans[0]["origin"] == "auto.ai.langchain" + + assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent" + + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 + + if send_default_pii and include_prompts: + assert ( + chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == "Hello, how can I help you?" + ) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads( + chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads( + chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "data", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", @@ -580,11 +750,13 @@ def test_langchain_create_agent( ) def test_tool_execution_span( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, responses_tool_call_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -594,8 +766,8 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") responses = responses_tool_call_model_responses( tool_name="get_word_length", @@ -657,12 +829,14 @@ def test_tool_execution_span( name="word_length_agent", ) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): agent.invoke( { "messages": [ @@ -671,86 +845,190 @@ def test_tool_execution_span( }, ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - assert len(chat_spans) == 2 + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - tool_exec_spans = list( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) - assert len(tool_exec_spans) == 1 - tool_exec_span = tool_exec_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + assert len(chat_spans) == 2 - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + tool_exec_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) - assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent" - assert chat_spans[1]["attributes"]["gen_ai.agent.name"] == "word_length_agent" - assert tool_exec_span["attributes"]["gen_ai.agent.name"] == "word_length_agent" + assert len(tool_exec_spans) == 1 + tool_exec_span = tool_exec_spans[0] - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 - assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat" + assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[1]["attributes"]["gen_ai.agent.name"] == "word_length_agent" + assert tool_exec_span["attributes"]["gen_ai.agent.name"] == "word_length_agent" - if send_default_pii and include_prompts: - assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat" + + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat" - assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + if send_default_pii and include_prompts: + assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] - # Verify tool calls are recorded when PII is enabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( - "attributes", {} - ), ( - "Tool calls should be recorded when send_default_pii=True and include_prompts=True" - ) - tool_calls_data = chat_spans[0]["attributes"][ - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS - ] - assert isinstance(tool_calls_data, str) - assert "get_word_length" in tool_calls_data + assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "attributes", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + assert isinstance(tool_calls_data, str) + assert "get_word_length" in tool_calls_data + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get( + "attributes", {} + ) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + assert "get_word_length" in tools_data else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) + events = capture_events() - # Verify tool calls are NOT recorded when PII is disabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" - ) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + agent.invoke( + { + "messages": [ + HumanMessage(content="How many letters in the word eudca"), + ], + }, + ) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 2 + + tool_exec_spans = list( + x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" ) - # Verify that available tools are always recorded regardless of PII settings - for chat_span in chat_spans: - tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert "get_word_length" in tools_data + assert len(tool_exec_spans) == 1 + tool_exec_span = tool_exec_spans[0] + + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[1]["data"]["gen_ai.agent.name"] == "word_length_agent" + assert tool_exec_span["data"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat" + + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat" + + if send_default_pii and include_prompts: + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "data", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, str) + assert "get_word_length" in tool_calls_data + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert "get_word_length" in tools_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -761,12 +1039,14 @@ def test_tool_execution_span( ) def test_langchain_openai_tools_agent_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -776,8 +1056,8 @@ def test_langchain_openai_tools_agent_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -815,12 +1095,14 @@ def test_langchain_openai_tools_agent_no_prompts( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list( agent_executor.invoke( {"input": "How many letters in the word eudca"}, @@ -828,89 +1110,193 @@ def test_langchain_openai_tools_agent_no_prompts( ) ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - tool_exec_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + tool_exec_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) - assert len(chat_spans) == 2 + assert len(chat_spans) == 2 - assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert ( + invoke_agent_span["attributes"]["gen_ai.function_id"] + == "my-snazzy-pipeline" + ) - # We can't guarantee anything about the "shape" of the langchain execution graph - assert ( - len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0 - ) + # We can't guarantee anything about the "shape" of the langchain execution graph + assert ( + len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) + > 0 + ) - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: - assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) - # Verify tool calls are NOT recorded when PII is disabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" - ) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" - ) + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) - # Verify finish_reasons is always an array of strings - assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "function_call" - ] - assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "stop" - ] + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + else: + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) + + tx = events[0] - # Verify that available tools are always recorded regardless of PII settings - for chat_span in chat_spans: - tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data is not None, ( - "Available tools should always be recorded regardless of PII settings" + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" + ) + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" ) - assert "get_word_length" in tools_data + assert len(chat_spans) == 2 + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "system_instructions_content", [ @@ -925,12 +1311,14 @@ def test_langchain_openai_tools_agent_no_prompts( ) def test_langchain_openai_tools_agent( sentry_init, + capture_events, capture_items, system_instructions_content, request, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -940,8 +1328,8 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -979,117 +1367,235 @@ def test_langchain_openai_tools_agent( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list(agent_executor.stream({"input": "How many letters in the word eudca"})) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - tool_exec_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - assert len(chat_spans) == 2 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + tool_exec_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) - assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert len(chat_spans) == 2 - # We can't guarantee anything about the "shape" of the langchain execution graph - assert ( - len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0 - ) + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + # We can't guarantee anything about the "shape" of the langchain execution graph + assert ( + len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) + > 0 + ) - if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: - assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] - assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 - param_id = request.node.callspec.id - if "string" in param_id: - assert [ - { - "type": "text", - "content": "You are very powerful assistant, but don't know current events", - } - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + + assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "attributes", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS + ] == ["function_call"] + assert chat_spans[1]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS + ] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data else: - assert [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + events = capture_events() - assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list(agent_executor.stream({"input": "How many letters in the word eudca"})) - # Verify tool calls are recorded when PII is enabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("attributes", {}), ( - "Tool calls should be recorded when send_default_pii=True and include_prompts=True" - ) - tool_calls_data = chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - assert isinstance(tool_calls_data, (list, str)) # Could be serialized - if isinstance(tool_calls_data, str): - assert "get_word_length" in tool_calls_data - elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: - # Check if tool calls contain expected function name - tool_call_str = str(tool_calls_data) - assert "get_word_length" in tool_call_str + tx = events[0] - # Verify finish_reasons is always an array of strings - assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "function_call" - ] - assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "stop" - ] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - # Verify that available tools are always recorded regardless of PII settings - for chat_span in chat_spans: - tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data is not None, ( - "Available tools should always be recorded regardless of PII settings" + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" ) - assert "get_word_length" in tools_data + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" + ) + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "data", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_openai_tools_agent_with_config( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1099,8 +1605,8 @@ def test_langchain_openai_tools_agent_with_config( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -1140,29 +1646,59 @@ def test_langchain_openai_tools_agent_with_config( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list( agent_executor.invoke( {"input": "How many letters in the word eudca"}, ) ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + assert ( + invoke_agent_span["attributes"]["gen_ai.function_id"] + == "my-snazzy-pipeline" + ) + else: + events = capture_events() + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" + ) + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -1173,12 +1709,14 @@ def test_langchain_openai_tools_agent_with_config( ) def test_langchain_openai_tools_agent_stream_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1188,8 +1726,8 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -1227,12 +1765,14 @@ def test_langchain_openai_tools_agent_stream_no_prompts( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list( agent_executor.stream( {"input": "How many letters in the word eudca"}, @@ -1240,90 +1780,195 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ) ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - tool_exec_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - assert len(chat_spans) == 2 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + tool_exec_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) - assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert len(chat_spans) == 2 - assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - spans = [item.payload for item in items if item.type == "span"] - # We can't guarantee anything about the "shape" of the langchain execution graph - assert ( - len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0 - ) + assert ( + invoke_agent_span["attributes"]["gen_ai.function_id"] + == "my-snazzy-pipeline" + ) - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + spans = [item.payload for item in items if item.type == "span"] + # We can't guarantee anything about the "shape" of the langchain execution graph + assert ( + len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) + > 0 + ) - if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: - assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) - assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) + if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 - # Verify tool calls are NOT recorded when PII is disabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" - ) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( - "attributes", {} - ), ( - f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " - f"and include_prompts={include_prompts}" - ) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {}) - # Verify finish_reasons is always an array of strings - assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "function_call" - ] - assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "stop" - ] + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "attributes", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) - # Verify that available tools are always recorded regardless of PII settings - for chat_span in chat_spans: - tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data is not None, ( - "Available tools should always be recorded regardless of PII settings" + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + else: + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" ) - assert "get_word_length" in tools_data + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" + ) + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "system_instructions_content", [ @@ -1338,12 +1983,14 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ) def test_langchain_openai_tools_agent_stream( sentry_init, + capture_events, capture_items, system_instructions_content, request, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1353,8 +2000,8 @@ def test_langchain_openai_tools_agent_stream( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -1392,12 +2039,14 @@ def test_langchain_openai_tools_agent_stream( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list( agent_executor.stream( {"input": "How many letters in the word eudca"}, @@ -1405,111 +2054,236 @@ def test_langchain_openai_tools_agent_stream( ) ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat") - tool_exec_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" - ) + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" - assert len(chat_spans) == 2 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + chat_spans = list( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat" + ) + tool_exec_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" + ) - assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert len(chat_spans) == 2 - assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - # We can't guarantee anything about the "shape" of the langchain execution graph - assert ( - len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0 - ) + assert ( + invoke_agent_span["attributes"]["gen_ai.function_id"] + == "my-snazzy-pipeline" + ) - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: - assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 + # We can't guarantee anything about the "shape" of the langchain execution graph + assert ( + len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) + > 0 + ) - if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: - assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]: + assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192 - assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] - assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]: + assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117 - param_id = request.node.callspec.id - if "string" in param_id: - assert [ - { - "type": "text", - "content": "You are very powerful assistant, but don't know current events", - } - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + + assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "attributes", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS + ] == ["function_call"] + assert chat_spans[1]["attributes"][ + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS + ] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS + ] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data else: - assert [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] == json.loads( - chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" + ) + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" ) - assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 - # Verify tool calls are recorded when PII is enabled - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("attributes", {}), ( - "Tool calls should be recorded when send_default_pii=True and include_prompts=True" - ) - tool_calls_data = chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - assert isinstance(tool_calls_data, (list, str)) # Could be serialized - if isinstance(tool_calls_data, str): - assert "get_word_length" in tool_calls_data - elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: - # Check if tool calls contain expected function name - tool_call_str = str(tool_calls_data) - assert "get_word_length" in tool_call_str + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 - # Verify finish_reasons is always an array of strings - assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "function_call" - ] - assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ - "stop" - ] + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) - # Verify that available tools are always recorded regardless of PII settings - for chat_span in chat_spans: - tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data is not None, ( - "Available tools should always be recorded regardless of PII settings" - ) - assert "get_word_length" in tools_data + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get( + "data", {} + ), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "stop" + ] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_openai_tools_agent_stream_with_config( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, streaming_chat_completions_model_responses, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1519,8 +2293,8 @@ def test_langchain_openai_tools_agent_stream_with_config( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") prompt = ChatPromptTemplate.from_messages( [ @@ -1560,30 +2334,65 @@ def test_langchain_openai_tools_agent_stream_with_config( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with patch.object( - llm.client._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - with start_transaction(): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): list( agent_executor.stream( {"input": "How many letters in the word eudca"}, ) ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = next(item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - ) - assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + ) + assert ( + invoke_agent_span["attributes"]["gen_ai.function_id"] + == "my-snazzy-pipeline" + ) + else: + events = capture_events() + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _, start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next( + x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent" + ) + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" -def test_langchain_error(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): global llm_type llm_type = "acme-llm" @@ -1591,8 +2400,8 @@ def test_langchain_error(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") prompt = ChatPromptTemplate.from_messages( [ @@ -1615,57 +2424,114 @@ def test_langchain_error(sentry_init, capture_items): agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with start_transaction(), pytest.raises(ValueError): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + if stream_gen_ai_spans: + items = capture_items("event") + + with start_transaction(), pytest.raises(ValueError): + list(agent_executor.stream({"input": "How many letters in the word eudca"})) + + (error,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + with start_transaction(), pytest.raises(ValueError): + list(agent_executor.stream({"input": "How many letters in the word eudca"})) - (error,) = (item.payload for item in items if item.type == "event") + error = events[0] assert error["level"] == "error" -def test_span_status_error(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_status_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): global llm_type llm_type = "acme-llm" sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with start_transaction(name="test"): + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + "You are very powerful assistant, but don't know current events", + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) - with start_transaction(name="test"): - prompt = ChatPromptTemplate.from_messages( - [ - ( - "system", - "You are very powerful assistant, but don't know current events", - ), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - ) - global stream_result_mock - stream_result_mock = Mock(side_effect=ValueError("API rate limit error")) - llm = MockOpenAI( - model_name="gpt-3.5-turbo", - temperature=0, - openai_api_key="badkey", - ) - agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor( + agent=agent, tools=[get_word_length], verbose=True + ) - agent_executor = AgentExecutor( - agent=agent, tools=[get_word_length], verbose=True - ) + with pytest.raises(ValueError): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"} + ) + ) - with pytest.raises(ValueError): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with start_transaction(name="test"): + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + "You are very powerful assistant, but don't know current events", + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + + agent_executor = AgentExecutor( + agent=agent, tools=[get_word_length], verbose=True + ) + + with pytest.raises(ValueError): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"} + ) + ) - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" - (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" @@ -1880,7 +2746,13 @@ def test_langchain_callback_list_existing_callback(sentry_init): assert handler is sentry_callback -def test_langchain_message_role_mapping(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_message_role_mapping( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that message roles are properly normalized in langchain integration.""" global llm_type llm_type = "openai-chat" @@ -1889,8 +2761,8 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") prompt = ChatPromptTemplate.from_messages( [ @@ -1923,53 +2795,112 @@ def test_langchain_message_role_mapping(sentry_init, capture_items): # Test input that should trigger message role normalization test_input = "Hello, how are you?" - with start_transaction(): - list(agent_executor.stream({"input": test_input})) + message_data_found = False + if stream_gen_ai_spans: + items = capture_items("span") - spans = [item.payload for item in items if item.type == "span"] - # Find spans with gen_ai operation that should have message data - gen_ai_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op", "").startswith("gen_ai") - ] + with start_transaction(): + list(agent_executor.stream({"input": test_input})) + + spans = [item.payload for item in items if item.type == "span"] + # Find spans with gen_ai operation that should have message data + gen_ai_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op", "").startswith("gen_ai") + ] - # Check if any span has message data with normalized roles - message_data_found = False - for span in gen_ai_spans: - span_data = span.get("attributes", {}) - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data: - message_data_found = True - messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] - - # Parse the message data (might be JSON string) - if isinstance(messages_data, str): - try: - messages = json.loads(messages_data) - except json.JSONDecodeError: - # If not valid JSON, skip this assertion - continue - else: - messages = messages_data + # Check if any span has message data with normalized roles + for span in gen_ai_spans: + span_data = span.get("attributes", {}) + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data: + message_data_found = True + messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] + + # Parse the message data (might be JSON string) + if isinstance(messages_data, str): + try: + messages = json.loads(messages_data) + except json.JSONDecodeError: + # If not valid JSON, skip this assertion + continue + else: + messages = messages_data + + # Verify that the input message is present and contains the test input + assert isinstance(messages, list) + assert len(messages) > 0 + + # The test input should be in one of the messages + input_found = False + for msg in messages: + if isinstance(msg, dict) and test_input in str( + msg.get("content", "") + ): + input_found = True + break + elif isinstance(msg, str) and test_input in msg: + input_found = True + break + + assert input_found, ( + f"Test input '{test_input}' not found in messages: {messages}" + ) + break + else: + events = capture_events() - # Verify that the input message is present and contains the test input - assert isinstance(messages, list) - assert len(messages) > 0 + with start_transaction(): + list(agent_executor.stream({"input": test_input})) - # The test input should be in one of the messages - input_found = False - for msg in messages: - if isinstance(msg, dict) and test_input in str(msg.get("content", "")): - input_found = True - break - elif isinstance(msg, str) and test_input in msg: - input_found = True - break + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert input_found, ( - f"Test input '{test_input}' not found in messages: {messages}" - ) - break + # Find spans with gen_ai operation that should have message data + gen_ai_spans = [ + span + for span in tx.get("spans", []) + if span.get("op", "").startswith("gen_ai") + ] + + # Check if any span has message data with normalized roles + for span in gen_ai_spans: + span_data = span.get("data", {}) + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data: + message_data_found = True + messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] + + # Parse the message data (might be JSON string) + if isinstance(messages_data, str): + try: + messages = json.loads(messages_data) + except json.JSONDecodeError: + # If not valid JSON, skip this assertion + continue + else: + messages = messages_data + + # Verify that the input message is present and contains the test input + assert isinstance(messages, list) + assert len(messages) > 0 + + # The test input should be in one of the messages + input_found = False + for msg in messages: + if isinstance(msg, dict) and test_input in str( + msg.get("content", "") + ): + input_found = True + break + elif isinstance(msg, str) and test_input in msg: + input_found = True + break + + assert input_found, ( + f"Test input '{test_input}' not found in messages: {messages}" + ) + break # The message role mapping functionality is primarily tested through the normalization # that happens in the integration code. The fact that we can capture and process @@ -2018,7 +2949,13 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -def test_langchain_message_truncation(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_message_truncation( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in Langchain integration.""" from langchain_core.outputs import LLMResult, Generation @@ -2026,8 +2963,8 @@ def test_langchain_message_truncation(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2045,48 +2982,101 @@ def test_langchain_message_truncation(sentry_init, capture_items): "small message 5", ] - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + name="my_pipeline", + invocation_params={ + "temperature": 0.7, + "max_tokens": 100, + "model": "gpt-3.5-turbo", + }, + ) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" + response = LLMResult( + generations=[[Generation(text="The response")]], + llm_output={ + "token_usage": { + "total_tokens": 25, + "prompt_tokens": 10, + "completion_tokens": 15, + } + }, + ) + callback.on_llm_end(response=response, run_id=run_id) - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - assert len(llm_spans) > 0 + tx = next(item.payload for item in items if item.type == "transaction") + assert tx["type"] == "transaction" + + spans = [item.payload for item in items if item.type == "span"] + llm_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" + ] + + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] + messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + else: + events = capture_events() + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + name="my_pipeline", + invocation_params={ + "temperature": 0.7, + "max_tokens": 100, + "model": "gpt-3.5-turbo", + }, + ) + + response = LLMResult( + generations=[[Generation(text="The response")]], + llm_output={ + "token_usage": { + "total_tokens": 25, + "prompt_tokens": 10, + "completion_tokens": 15, + } + }, + ) + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" - llm_span = llm_spans[0] - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] + messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] - messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2096,6 +3086,7 @@ def test_langchain_message_truncation(sentry_init, capture_items): assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2106,7 +3097,12 @@ def test_langchain_message_truncation(sentry_init, capture_items): ], ) def test_langchain_embeddings_sync( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test that sync embedding methods (embed_documents, embed_query) are properly traced.""" try: @@ -2118,64 +3114,129 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ) as mock_embed_documents: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Mock the actual API call - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ) as mock_embed_documents: - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() - # Force setup to re-run to ensure our mock is wrapped - LangchainIntegration.setup_once() + with start_transaction(name="test_embeddings"): + # Test embed_documents + result = embeddings.embed_documents(["Hello world", "Test document"]) - with start_transaction(name="test_embeddings"): - # Test embed_documents - result = embeddings.embed_documents(["Hello world", "Test document"]) + assert len(result) == 2 + mock_embed_documents.assert_called_once() - assert len(result) == 2 - mock_embed_documents.assert_called_once() - - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings span - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 1 + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings span + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - embeddings_span = embeddings_spans[0] - assert embeddings_span["name"] == "embeddings text-embedding-ada-002" - assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert ( - embeddings_span["attributes"]["gen_ai.request.model"] - == "text-embedding-ada-002" - ) + assert len(embeddings_spans) == 1 - # Check if input is captured based on PII settings - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] - input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - # Could be serialized as string - if isinstance(input_data, str): - assert "Hello world" in input_data - assert "Test document" in input_data + embeddings_span = embeddings_spans[0] + assert embeddings_span["name"] == "embeddings text-embedding-ada-002" + assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + + # Could be serialized as string + if isinstance(input_data, str): + assert "Hello world" in input_data + assert "Test document" in input_data + else: + assert "Hello world" in input_data + assert "Test document" in input_data else: - assert "Hello world" in input_data - assert "Test document" in input_data + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( - "attributes", {} + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ) as mock_embed_documents: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings"): + # Test embed_documents + result = embeddings.embed_documents(["Hello world", "Test document"]) + + assert len(result) == 2 + mock_embed_documents.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings text-embedding-ada-002" + assert embeddings_span["origin"] == "auto.ai.langchain" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" ) + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "Hello world" in input_data + assert "Test document" in input_data + else: + assert "Hello world" in input_data + assert "Test document" in input_data + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "data", {} + ) + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2184,7 +3245,12 @@ def test_langchain_embeddings_sync( ], ) def test_langchain_embeddings_embed_query( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test that embed_query method is properly traced.""" try: @@ -2196,59 +3262,121 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.1, 0.2, 0.3], + ) as mock_embed_query: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Mock the actual API call - with mock.patch.object( - OpenAIEmbeddings, - "embed_query", - wraps=lambda self, text: [0.1, 0.2, 0.3], - ) as mock_embed_query: - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() - # Force setup to re-run to ensure our mock is wrapped - LangchainIntegration.setup_once() + with start_transaction(name="test_embeddings_query"): + result = embeddings.embed_query("What is the capital of France?") - with start_transaction(name="test_embeddings_query"): - result = embeddings.embed_query("What is the capital of France?") + assert len(result) == 3 + mock_embed_query.assert_called_once() - assert len(result) == 3 - mock_embed_query.assert_called_once() + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings span + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings span - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 1 + assert len(embeddings_spans) == 1 - embeddings_span = embeddings_spans[0] - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert ( - embeddings_span["attributes"]["gen_ai.request.model"] - == "text-embedding-ada-002" - ) + embeddings_span = embeddings_spans[0] - # Check if input is captured based on PII settings - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] - input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - # Could be serialized as string - if isinstance(input_data, str): - assert "What is the capital of France?" in input_data + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + + # Could be serialized as string + if isinstance(input_data, str): + assert "What is the capital of France?" in input_data + else: + assert "What is the capital of France?" in input_data else: - assert "What is the capital of France?" in input_data + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( - "attributes", {} + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.1, 0.2, 0.3], + ) as mock_embed_query: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings_query"): + result = embeddings.embed_query("What is the capital of France?") + + assert len(result) == 3 + mock_embed_query.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" ) + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "What is the capital of France?" in input_data + else: + assert "What is the capital of France?" in input_data + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "data", {} + ) + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -2258,7 +3386,12 @@ def test_langchain_embeddings_embed_query( ) @pytest.mark.asyncio async def test_langchain_embeddings_async( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test that async embedding methods (aembed_documents, aembed_query) are properly traced.""" try: @@ -2270,68 +3403,147 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") async def mock_aembed_documents(self, texts): return [[0.1, 0.2, 0.3] for _ in texts] - # Mock the actual API call - with mock.patch.object( - OpenAIEmbeddings, - "aembed_documents", - wraps=mock_aembed_documents, - ) as mock_aembed: - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_documents", + wraps=mock_aembed_documents, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Force setup to re-run to ensure our mock is wrapped - LangchainIntegration.setup_once() + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() - with start_transaction(name="test_async_embeddings"): - result = await embeddings.aembed_documents( - ["Async hello", "Async test document"] - ) + with start_transaction(name="test_async_embeddings"): + result = await embeddings.aembed_documents( + ["Async hello", "Async test document"] + ) - assert len(result) == 2 - mock_aembed.assert_called_once() - - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings span - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 1 + assert len(result) == 2 + mock_aembed.assert_called_once() - embeddings_span = embeddings_spans[0] - assert embeddings_span["name"] == "embeddings text-embedding-ada-002" - assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert ( - embeddings_span["attributes"]["gen_ai.request.model"] - == "text-embedding-ada-002" - ) + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings span + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - # Check if input is captured based on PII settings - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] - input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - # Could be serialized as string - if isinstance(input_data, str): - assert "Async hello" in input_data or "Async test document" in input_data + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["name"] == "embeddings text-embedding-ada-002" + assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" + ) + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + + # Could be serialized as string + if isinstance(input_data, str): + assert ( + "Async hello" in input_data or "Async test document" in input_data + ) + else: + assert ( + "Async hello" in input_data or "Async test document" in input_data + ) else: - assert "Async hello" in input_data or "Async test document" in input_data + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "attributes", {} + ) + else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( - "attributes", {} + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_documents", + wraps=mock_aembed_documents, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_async_embeddings"): + result = await embeddings.aembed_documents( + ["Async hello", "Async test document"] + ) + + assert len(result) == 2 + mock_aembed.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings text-embedding-ada-002" + assert embeddings_span["origin"] == "auto.ai.langchain" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" ) + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert ( + "Async hello" in input_data or "Async test document" in input_data + ) + else: + assert ( + "Async hello" in input_data or "Async test document" in input_data + ) + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get( + "data", {} + ) + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): +async def test_langchain_embeddings_aembed_query( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that aembed_query method is properly traced.""" try: from langchain_openai import OpenAIEmbeddings @@ -2342,50 +3554,100 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") async def mock_aembed_query(self, text): return [0.1, 0.2, 0.3] - # Mock the actual API call - with mock.patch.object( - OpenAIEmbeddings, - "aembed_query", - wraps=mock_aembed_query, - ) as mock_aembed: - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_query", + wraps=mock_aembed_query, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_async_embeddings_query"): + result = await embeddings.aembed_query("Async query test") + + assert len(result) == 3 + mock_aembed.assert_called_once() + + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings span + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["attributes"]["gen_ai.request.model"] + == "text-embedding-ada-002" ) - # Force setup to re-run to ensure our mock is wrapped - LangchainIntegration.setup_once() + # Check if input is captured + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] + input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + else: + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_query", + wraps=mock_aembed_query, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - with start_transaction(name="test_async_embeddings_query"): - result = await embeddings.aembed_query("Async query test") + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() - assert len(result) == 3 - mock_aembed.assert_called_once() + with start_transaction(name="test_async_embeddings_query"): + result = await embeddings.aembed_query("Async query test") - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings span - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 1 + assert len(result) == 3 + mock_aembed.assert_called_once() - embeddings_span = embeddings_spans[0] - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert ( - embeddings_span["attributes"]["gen_ai.request.model"] - == "text-embedding-ada-002" - ) + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert ( + embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + ) + + # Check if input is captured + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - # Check if input is captured - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"] - input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] # Could be serialized as string if isinstance(input_data, str): assert "Async query test" in input_data @@ -2393,7 +3655,13 @@ async def mock_aembed_query(self, text): assert "Async query test" in input_data -def test_langchain_embeddings_no_model_name(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_no_model_name( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test embeddings when model name is not available.""" try: from langchain_openai import OpenAIEmbeddings @@ -2403,81 +3671,169 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items): sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API call and remove model attribute + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings(openai_api_key="test-key") + # Remove model attribute to test fallback + delattr(embeddings, "model") + if hasattr(embeddings, "model_name"): + delattr(embeddings, "model_name") - # Mock the actual API call and remove model attribute - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ): - embeddings = OpenAIEmbeddings(openai_api_key="test-key") - # Remove model attribute to test fallback - delattr(embeddings, "model") - if hasattr(embeddings, "model_name"): - delattr(embeddings, "model_name") + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() - # Force setup to re-run to ensure our mock is wrapped - LangchainIntegration.setup_once() + with start_transaction(name="test_embeddings_no_model"): + embeddings.embed_documents(["Test"]) - with start_transaction(name="test_embeddings_no_model"): - embeddings.embed_documents(["Test"]) + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings span + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings span - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 1 + assert len(embeddings_spans) == 1 - embeddings_span = embeddings_spans[0] - assert embeddings_span["name"] == "embeddings" - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" - # Model name should not be set if not available - assert ( - "gen_ai.request.model" not in embeddings_span["attributes"] - or embeddings_span["attributes"]["gen_ai.request.model"] is None - ) + embeddings_span = embeddings_spans[0] + assert embeddings_span["name"] == "embeddings" + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + # Model name should not be set if not available + assert ( + "gen_ai.request.model" not in embeddings_span["attributes"] + or embeddings_span["attributes"]["gen_ai.request.model"] is None + ) + else: + events = capture_events() + + # Mock the actual API call and remove model attribute + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings(openai_api_key="test-key") + # Remove model attribute to test fallback + delattr(embeddings, "model") + if hasattr(embeddings, "model_name"): + delattr(embeddings, "model_name") + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings_no_model"): + embeddings.embed_documents(["Test"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 1 + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + # Model name should not be set if not available + assert ( + "gen_ai.request.model" not in embeddings_span["data"] + or embeddings_span["data"]["gen_ai.request.model"] is None + ) -def test_langchain_embeddings_integration_disabled(sentry_init, capture_items): + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_integration_disabled( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that embeddings are not traced when integration is disabled.""" try: from langchain_openai import OpenAIEmbeddings except ImportError: pytest.skip("langchain_openai not installed") + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + # Initialize without LangchainIntegration - sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True}) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - return_value=[[0.1, 0.2, 0.3]], - ): - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + return_value=[[0.1, 0.2, 0.3]], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - with start_transaction(name="test_embeddings_disabled"): - embeddings.embed_documents(["Test"]) + with start_transaction(name="test_embeddings_disabled"): + embeddings.embed_documents(["Test"]) - # Check that no embeddings spans were created - spans = [item.payload for item in items if item.type == "span"] - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - # Should be empty since integration is disabled - assert len(embeddings_spans) == 0 + # Check that no embeddings spans were created + spans = [item.payload for item in items if item.type == "span"] + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] + # Should be empty since integration is disabled + assert len(embeddings_spans) == 0 + else: + events = capture_events() + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + return_value=[[0.1, 0.2, 0.3]], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + with start_transaction(name="test_embeddings_disabled"): + embeddings.embed_documents(["Test"]) -def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): + # Check that no embeddings spans were created + if events: + tx = events[0] + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + # Should be empty since integration is disabled + assert len(embeddings_spans) == 0 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_multiple_providers( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that embeddings work with different providers.""" try: from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings @@ -2488,53 +3844,107 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock both providers + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + AzureOpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts], + ): + openai_embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + azure_embeddings = AzureOpenAIEmbeddings( + model="text-embedding-ada-002", + azure_endpoint="https://test.openai.azure.com/", + openai_api_key="test-key", + ) - # Mock both providers - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ), mock.patch.object( - AzureOpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts], - ): - openai_embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) - azure_embeddings = AzureOpenAIEmbeddings( - model="text-embedding-ada-002", - azure_endpoint="https://test.openai.azure.com/", - openai_api_key="test-key", - ) + # Force setup to re-run + LangchainIntegration.setup_once() - # Force setup to re-run - LangchainIntegration.setup_once() + with start_transaction(name="test_multiple_providers"): + openai_embeddings.embed_documents(["OpenAI test"]) + azure_embeddings.embed_documents(["Azure test"]) + + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings spans + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - with start_transaction(name="test_multiple_providers"): - openai_embeddings.embed_documents(["OpenAI test"]) - azure_embeddings.embed_documents(["Azure test"]) + # Should have 2 spans, one for each provider + assert len(embeddings_spans) == 2 - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings spans - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - # Should have 2 spans, one for each provider - assert len(embeddings_spans) == 2 + # Verify both spans have proper data + for span in embeddings_spans: + assert span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" + ) + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] + else: + events = capture_events() + + # Mock both providers + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + AzureOpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts], + ): + openai_embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + azure_embeddings = AzureOpenAIEmbeddings( + model="text-embedding-ada-002", + azure_endpoint="https://test.openai.azure.com/", + openai_api_key="test-key", + ) + + # Force setup to re-run + LangchainIntegration.setup_once() - # Verify both spans have proper data - for span in embeddings_spans: - assert span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] + with start_transaction(name="test_multiple_providers"): + openai_embeddings.embed_documents(["OpenAI test"]) + azure_embeddings.embed_documents(["Azure test"]) + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" -def test_langchain_embeddings_error_handling(sentry_init, capture_items): + # Find embeddings spans + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + # Should have 2 spans, one for each provider + assert len(embeddings_spans) == 2 + + # Verify both spans have proper data + for span in embeddings_spans: + assert span["data"]["gen_ai.operation.name"] == "embeddings" + assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + + +def test_langchain_embeddings_error_handling(sentry_init, capture_events): """Test that errors in embeddings are properly captured.""" try: from langchain_openai import OpenAIEmbeddings @@ -2546,7 +3956,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): traces_sample_rate=1.0, send_default_pii=True, ) - items = capture_items("event") + events = capture_events() # Mock the API call to raise an error with mock.patch.object( @@ -2561,20 +3971,24 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items): # Force setup to re-run LangchainIntegration.setup_once() - with start_transaction(name="test_embeddings_error"): - with pytest.raises(ValueError): - embeddings.embed_documents(["Test"]) + with start_transaction(name="test_embeddings_error"), pytest.raises(ValueError): + embeddings.embed_documents(["Test"]) - [ - item.payload - for item in items - if item.type == "event" and item.payload.get("level") == "error" - ] + # The error should be captured + assert len(events) >= 1 + # We should have both the transaction and potentially an error event + [e for e in events if e.get("level") == "error"] # Note: errors might not be auto-captured depending on SDK settings, # but the span should still be created -def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_multiple_calls( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that multiple embeddings calls within a transaction are all traced.""" try: from langchain_openai import OpenAIEmbeddings @@ -2585,59 +3999,122 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Mock the actual API calls - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ), mock.patch.object( - OpenAIEmbeddings, - "embed_query", - wraps=lambda self, text: [0.4, 0.5, 0.6], - ): - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_multiple_embeddings"): + # Call embed_documents + embeddings.embed_documents(["First batch", "Second batch"]) + # Call embed_query + embeddings.embed_query("Single query") + # Call embed_documents again + embeddings.embed_documents(["Third batch"]) + + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query) + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - # Force setup to re-run - LangchainIntegration.setup_once() + assert len(embeddings_spans) == 3 - with start_transaction(name="test_multiple_embeddings"): - # Call embed_documents - embeddings.embed_documents(["First batch", "Second batch"]) - # Call embed_query - embeddings.embed_query("Single query") - # Call embed_documents again - embeddings.embed_documents(["Third batch"]) - - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query) - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 3 - - # Verify all spans have proper data - for span in embeddings_spans: - assert span["attributes"]["gen_ai.operation.name"] == "embeddings" - assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] - - # Verify the input data is different for each span - input_data_list = [ - span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - for span in embeddings_spans - ] + # Verify all spans have proper data + for span in embeddings_spans: + assert span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert ( + span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002" + ) + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] + + # Verify the input data is different for each span + input_data_list = [ + span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + for span in embeddings_spans + ] + else: + events = capture_events() + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_multiple_embeddings"): + # Call embed_documents + embeddings.embed_documents(["First batch", "Second batch"]) + # Call embed_query + embeddings.embed_query("Single query") + # Call embed_documents again + embeddings.embed_documents(["Third batch"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query) + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 3 + + # Verify all spans have proper data + for span in embeddings_spans: + assert span["data"]["gen_ai.operation.name"] == "embeddings" + assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + + # Verify the input data is different for each span + input_data_list = [ + span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans + ] # They should all be different (different inputs) assert len(set(str(data) for data in input_data_list)) == 3 -def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_span_hierarchy( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that embeddings spans are properly nested within parent spans.""" try: from langchain_openai import OpenAIEmbeddings @@ -2648,49 +4125,106 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items): integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Mock the actual API call - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ): - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + # Force setup to re-run + LangchainIntegration.setup_once() - # Force setup to re-run - LangchainIntegration.setup_once() + with start_transaction(name="test_span_hierarchy"), sentry_sdk.start_span( + op="custom", name="custom operation" + ): + embeddings.embed_documents(["Test within custom span"]) + + spans = [item.payload for item in items if item.type == "span"] + # Find all spans + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] + + tx = next(item.payload for item in items if item.type == "transaction") + + custom_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "custom" + ] + + assert len(embeddings_spans) == 1 + assert len(custom_spans) == 1 + + # Both spans should exist + embeddings_span = embeddings_spans[0] + custom_span = custom_spans[0] + + assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + else: + events = capture_events() - with start_transaction(name="test_span_hierarchy"): - with sentry_sdk.start_span(op="custom", name="custom operation"): + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_span_hierarchy"), sentry_sdk.start_span( + op="custom", name="custom operation" + ): embeddings.embed_documents(["Test within custom span"]) - spans = [item.payload for item in items if item.type == "span"] - # Find all spans - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find all spans + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] - tx = next(item.payload for item in items if item.type == "transaction") - custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"] + custom_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "custom" + ] - assert len(embeddings_spans) == 1 - assert len(custom_spans) == 1 + assert len(embeddings_spans) == 1 + assert len(custom_spans) == 1 - # Both spans should exist - embeddings_span = embeddings_spans[0] - custom_span = custom_spans[0] + # Both spans should exist + embeddings_span = embeddings_spans[0] + custom_span = custom_spans[0] - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" assert custom_span["description"] == "custom operation" -def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langchain_embeddings_with_list_and_string_inputs( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that embeddings correctly handle both list and string inputs.""" try: from langchain_openai import OpenAIEmbeddings @@ -2701,53 +4235,111 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") - - # Mock the actual API calls - with mock.patch.object( - OpenAIEmbeddings, - "embed_documents", - wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], - ), mock.patch.object( - OpenAIEmbeddings, - "embed_query", - wraps=lambda self, text: [0.4, 0.5, 0.6], - ): - embeddings = OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key="test-key" - ) + if stream_gen_ai_spans: + items = capture_items("span") + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) - # Force setup to re-run - LangchainIntegration.setup_once() + # Force setup to re-run + LangchainIntegration.setup_once() - with start_transaction(name="test_input_types"): - # embed_documents takes a list - embeddings.embed_documents(["List item 1", "List item 2", "List item 3"]) - # embed_query takes a string - embeddings.embed_query("Single string query") - - spans = [item.payload for item in items if item.type == "span"] - # Find embeddings spans - embeddings_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.embeddings" - ] - assert len(embeddings_spans) == 2 + with start_transaction(name="test_input_types"): + # embed_documents takes a list + embeddings.embed_documents( + ["List item 1", "List item 2", "List item 3"] + ) + # embed_query takes a string + embeddings.embed_query("Single string query") + + spans = [item.payload for item in items if item.type == "span"] + # Find embeddings spans + embeddings_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.embeddings" + ] - # Both should have input data captured as lists - for span in embeddings_spans: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] - input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - # Input should be normalized to list format - if isinstance(input_data, str): - # If serialized, should contain the input text - assert "List item" in input_data or "Single string query" in input_data, ( - f"Expected input text in serialized data: {input_data}" + assert len(embeddings_spans) == 2 + + # Both should have input data captured as lists + for span in embeddings_spans: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"] + input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Input should be normalized to list format + if isinstance(input_data, str): + # If serialized, should contain the input text + assert ( + "List item" in input_data or "Single string query" in input_data + ), f"Expected input text in serialized data: {input_data}" + else: + events = capture_events() + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" ) + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_input_types"): + # embed_documents takes a list + embeddings.embed_documents( + ["List item 1", "List item 2", "List item 3"] + ) + # embed_query takes a string + embeddings.embed_query("Single string query") + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings spans + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + + assert len(embeddings_spans) == 2 + + # Both should have input data captured as lists + for span in embeddings_spans: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Input should be normalized to list format + if isinstance(input_data, str): + # If serialized, should contain the input text + assert ( + "List item" in input_data or "Single string query" in input_data + ), f"Expected input text in serialized data: {input_data}" + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "response_metadata_model,expected_model", [ @@ -2757,16 +4349,18 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i ) def test_langchain_response_model_extraction( sentry_init, + capture_events, capture_items, response_metadata_model, expected_model, + stream_gen_ai_spans, ): sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2774,39 +4368,87 @@ def test_langchain_response_model_extraction( serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} prompts = ["Test prompt"] - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - invocation_params={"model": "gpt-3.5-turbo"}, - ) + if stream_gen_ai_spans: + items = capture_items("span") - response_metadata = {"model_name": response_metadata_model} - message = AIMessageChunk( - content="Test response", response_metadata=response_metadata - ) + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"model": "gpt-3.5-turbo"}, + ) + + response_metadata = {"model_name": response_metadata_model} + message = AIMessageChunk( + content="Test response", response_metadata=response_metadata + ) - generation = Mock(text="Test response", message=message) - response = Mock(generations=[[generation]]) - callback.on_llm_end(response=response, run_id=run_id) + generation = Mock(text="Test response", message=message) + response = Mock(generations=[[generation]]) + callback.on_llm_end(response=response, run_id=run_id) - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - assert len(llm_spans) > 0 + spans = [item.payload for item in items if item.type == "span"] + llm_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" + ] + + assert len(llm_spans) > 0 - llm_span = llm_spans[0] - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" + llm_span = llm_spans[0] - if expected_model is not None: - assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"] - assert llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model + assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" + + if expected_model is not None: + assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"] + assert ( + llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model + ) + else: + assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {}) else: - assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {}) + events = capture_events() + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"model": "gpt-3.5-turbo"}, + ) + + response_metadata = {"model_name": response_metadata_model} + message = AIMessageChunk( + content="Test response", response_metadata=response_metadata + ) + + generation = Mock(text="Test response", message=message) + response = Mock(generations=[[generation]]) + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + + if expected_model is not None: + assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"] + assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model + else: + assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) # Tests for multimodal content transformation functions @@ -3020,6 +4662,7 @@ def test_transform_google_file_data(self): } +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "ai_type,expected_system", [ @@ -3065,13 +4708,18 @@ def test_transform_google_file_data(self): ], ) def test_langchain_ai_system_detection( - sentry_init, capture_items, ai_type, expected_system + sentry_init, + capture_events, + capture_items, + ai_type, + expected_system, + stream_gen_ai_spans, ): sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -3079,32 +4727,67 @@ def test_langchain_ai_system_detection( serialized = {"_type": ai_type} if ai_type is not None else {} prompts = ["Test prompt"] - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - invocation_params={"_type": ai_type, "model": "test-model"}, - ) + if stream_gen_ai_spans: + items = capture_items("span") - generation = Mock(text="Test response", message=None) - response = Mock(generations=[[generation]]) - callback.on_llm_end(response=response, run_id=run_id) + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"_type": ai_type, "model": "test-model"}, + ) - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - assert len(llm_spans) > 0 + generation = Mock(text="Test response", message=None) + response = Mock(generations=[[generation]]) + callback.on_llm_end(response=response, run_id=run_id) + + spans = [item.payload for item in items if item.type == "span"] + llm_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == "gen_ai.text_completion" + ] - llm_span = llm_spans[0] + assert len(llm_spans) > 0 + llm_span = llm_spans[0] - if expected_system is not None: - assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system + if expected_system is not None: + assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system + else: + assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {}) else: - assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {}) + events = capture_events() + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"_type": ai_type, "model": "test-model"}, + ) + + generation = Mock(text="Test response", message=None) + response = Mock(generations=[[generation]]) + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + + assert len(llm_spans) > 0 + llm_span = llm_spans[0] + + if expected_system is not None: + assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system + else: + assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {}) class TestTransformLangchainMessageContent: diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index e1a3baa0a8..0052fefa29 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -137,6 +137,7 @@ def test_langgraph_integration_init(): assert integration.origin == "auto.ai.langgraph" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -147,53 +148,103 @@ def test_langgraph_integration_init(): ], ) def test_state_graph_compile( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """Test StateGraph.compile() wrapper creates proper create_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + graph = MockStateGraph() def original_compile(self, *args, **kwargs): return MockCompiledGraph(self.name) - with patch("sentry_sdk.integrations.langgraph.StateGraph"): - with start_transaction(): - wrapped_compile = _wrap_state_graph_compile(original_compile) - compiled_graph = wrapped_compile( - graph, model="test-model", checkpointer=None - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert compiled_graph is not None - assert compiled_graph.name == "test_graph" + with patch("sentry_sdk.integrations.langgraph.StateGraph"): + with start_transaction(): + wrapped_compile = _wrap_state_graph_compile(original_compile) + compiled_graph = wrapped_compile( + graph, model="test-model", checkpointer=None + ) - spans = [item.payload for item in items if item.type == "span"] - agent_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT - ] - assert len(agent_spans) == 1 + assert compiled_graph is not None + assert compiled_graph.name == "test_graph" + + spans = [item.payload for item in items if item.type == "span"] + agent_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT + ] + + assert len(agent_spans) == 1 + agent_span = agent_spans[0] + + assert agent_span["name"] == "create_agent test_graph" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert ( + agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent" + ) + assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model" + assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"] + + tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - agent_span = agent_spans[0] - assert agent_span["name"] == "create_agent test_graph" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" - assert agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent" - assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model" - assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"] + assert tools_data == ["search_tool", "calculator"] + assert len(tools_data) == 2 + assert "search_tool" in tools_data + assert "calculator" in tools_data + else: + events = capture_events() - tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data == ["search_tool", "calculator"] - assert len(tools_data) == 2 - assert "search_tool" in tools_data - assert "calculator" in tools_data + with patch("sentry_sdk.integrations.langgraph.StateGraph"): + with start_transaction(): + wrapped_compile = _wrap_state_graph_compile(original_compile) + compiled_graph = wrapped_compile( + graph, model="test-model", checkpointer=None + ) + assert compiled_graph is not None + assert compiled_graph.name == "test_graph" + tx = events[0] + assert tx["type"] == "transaction" + + agent_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT + ] + + assert len(agent_spans) == 1 + agent_span = agent_spans[0] + + assert agent_span["description"] == "create_agent test_graph" + assert agent_span["origin"] == "auto.ai.langgraph" + assert agent_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent" + assert agent_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + assert agent_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model" + assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["data"] + + tools_data = agent_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + + assert tools_data == ["search_tool", "calculator"] + assert len(tools_data) == 2 + assert "search_tool" in tools_data + assert "calculator" in tools_data + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -203,14 +254,21 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_prompts): +def test_pregel_invoke( + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, +): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -241,61 +299,137 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + assert result is not None - invoke_span = invoke_spans[0] - assert invoke_span["name"] == "invoke_agent test_graph" - assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + assert len(invoke_spans) == 1 - request_messages = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + invoke_span = invoke_spans[0] - if isinstance(request_messages, str): - import json + assert invoke_span["name"] == "invoke_agent test_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response + request_messages = invoke_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_MESSAGES + ] - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] - tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - if isinstance(tool_calls_data, str): - import json + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" - tool_calls_data = json.loads(tool_calls_data) + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + if isinstance(tool_calls_data, str): + import json + + tool_calls_data = json.loads(tool_calls_data) + + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "attributes", {} - ) + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + tx = events[0] + assert tx["type"] == "transaction" + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + + assert invoke_span["description"] == "invoke_agent test_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + + request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" + + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + if isinstance(tool_calls_data, str): + import json + + tool_calls_data = json.loads(tool_calls_data) + + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "data", {} + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -305,14 +439,22 @@ def original_invoke(self, *args, **kwargs): (False, False), ], ) -def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_prompts): +def test_pregel_ainvoke( + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, +): """Test Pregel.ainvoke() async wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + test_state = {"messages": [MockMessage("What's the weather like?", name="user")]} pregel = MockPregelInstance("async_graph") @@ -342,87 +484,187 @@ async def run_test(): result = await wrapped_ainvoke(pregel, test_state) return result - result = asyncio.run(run_test()) - assert result is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + result = asyncio.run(run_test()) + assert result is not None + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - invoke_span = invoke_spans[0] - assert invoke_span["name"] == "invoke_agent async_graph" - assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph" + assert len(invoke_spans) == 1 - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + invoke_span = invoke_spans[0] - response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response + assert invoke_span["name"] == "invoke_agent async_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph" - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] - tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - if isinstance(tool_calls_data, str): - import json + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + if isinstance(tool_calls_data, str): + import json - tool_calls_data = json.loads(tool_calls_data) + tool_calls_data = json.loads(tool_calls_data) - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_weather_456" - assert tool_calls_data[0]["function"]["name"] == "get_weather" + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_weather_456" + assert tool_calls_data[0]["function"]["name"] == "get_weather" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "attributes", {} - ) + events = capture_events() + + result = asyncio.run(run_test()) + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert invoke_span["description"] == "invoke_agent async_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph" -def test_pregel_invoke_error(sentry_init, capture_items): + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + if isinstance(tool_calls_data, str): + import json + + tool_calls_data = json.loads(tool_calls_data) + + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_weather_456" + assert tool_calls_data[0]["function"]["name"] == "get_weather" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "data", {} + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_invoke_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test error handling during graph execution.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + test_state = {"messages": [MockMessage("This will fail")]} pregel = MockPregelInstance("error_graph") def original_invoke(self, *args, **kwargs): raise Exception("Graph execution failed") - with start_transaction(), pytest.raises(Exception, match="Graph execution failed"): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + with start_transaction(), pytest.raises( + Exception, match="Graph execution failed" + ): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + wrapped_invoke(pregel, test_state) + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert invoke_span.get("status") == "error" + else: + events = capture_events() + + with start_transaction(), pytest.raises( + Exception, match="Graph execution failed" + ): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + wrapped_invoke(pregel, test_state) + + tx = events[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 - invoke_span = invoke_spans[0] - assert invoke_span.get("status") == "error" + invoke_span = invoke_spans[0] + assert invoke_span.get("status") == "internal_error" + assert invoke_span.get("tags", {}).get("status") == "internal_error" -def test_pregel_ainvoke_error(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_ainvoke_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test error handling during async graph execution.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + test_state = {"messages": [MockMessage("This will fail async")]} pregel = MockPregelInstance("async_error_graph") @@ -436,58 +678,105 @@ async def run_error_test(): wrapped_ainvoke = _wrap_pregel_ainvoke(original_ainvoke) await wrapped_ainvoke(pregel, test_state) - asyncio.run(run_error_test()) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + asyncio.run(run_error_test()) + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert invoke_span.get("status") == "error" + else: + events = capture_events() + + asyncio.run(run_error_test()) - invoke_span = invoke_spans[0] - assert invoke_span.get("status") == "error" + tx = events[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert invoke_span.get("status") == "internal_error" + assert invoke_span.get("tags", {}).get("status") == "internal_error" -def test_span_origin(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_origin( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that span origins are correctly set.""" sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") graph = MockStateGraph() def original_compile(self, *args, **kwargs): return MockCompiledGraph(self.name) - with start_transaction(): - from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(): + from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile + + wrapped_compile = _wrap_state_graph_compile(original_compile) + wrapped_compile(graph) + + tx = next(item.payload for item in items if item.type == "transaction") + assert tx["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + else: + events = capture_events() + + with start_transaction(): + from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile - wrapped_compile = _wrap_state_graph_compile(original_compile) - wrapped_compile(graph) + wrapped_compile = _wrap_state_graph_compile(original_compile) + wrapped_compile(graph) - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["contexts"]["trace"]["origin"] == "manual" + tx = events[0] + assert tx["contexts"]["trace"]["origin"] == "manual" - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + for span in tx["spans"]: + assert span["origin"] == "auto.ai.langgraph" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize("graph_name", ["my_graph", None, ""]) def test_pregel_invoke_with_different_graph_names( - sentry_init, capture_items, graph_name + sentry_init, + capture_events, + capture_items, + graph_name, + stream_gen_ai_spans, ): """Test Pregel.invoke() with different graph name scenarios.""" sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance() if not graph_name: @@ -497,31 +786,69 @@ def test_pregel_invoke_with_different_graph_names( def original_invoke(self, *args, **kwargs): return {"result": "test"} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - wrapped_invoke(pregel, {"messages": []}) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + wrapped_invoke(pregel, {"messages": []}) + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - invoke_span = invoke_spans[0] + assert len(invoke_spans) == 1 - if graph_name and graph_name.strip(): - assert invoke_span["name"] == "invoke_agent my_graph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name - assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name + invoke_span = invoke_spans[0] + + if graph_name and graph_name.strip(): + assert invoke_span["name"] == "invoke_agent my_graph" + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name + else: + assert invoke_span["name"] == "invoke_agent" + assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {}) else: - assert invoke_span["name"] == "invoke_agent" - assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("attributes", {}) - assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {}) + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + wrapped_invoke(pregel, {"messages": []}) + + tx = events[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + + if graph_name and graph_name.strip(): + assert invoke_span["description"] == "invoke_agent my_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name + else: + assert invoke_span["description"] == "invoke_agent" + assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("data", {}) -def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items): + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_invoke_span_includes_usage_data( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. @@ -529,8 +856,8 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -569,35 +896,75 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has usage data + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + + # The usage should match the mock_usage values (aggregated across all calls) + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] - # Verify invoke_agent span has usage data - assert invoke_agent_span["name"] == "invoke_agent test_graph" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + # Verify invoke_agent span has usage data + assert invoke_agent_span["description"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] - # The usage should match the mock_usage values (aggregated across all calls) - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + # The usage should match the mock_usage values (aggregated across all calls) + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 -def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_ainvoke_span_includes_usage_data( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. This verifies the new functionality added to track token usage in invoke_agent spans. @@ -605,8 +972,8 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -651,32 +1018,69 @@ async def run_test(): result = await wrapped_ainvoke(pregel, test_state) return result - result = asyncio.run(run_test()) - assert result is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + result = asyncio.run(run_test()) + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] - # Verify invoke_agent span has usage data - assert invoke_agent_span["name"] == "invoke_agent test_graph" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + # Verify invoke_agent span has usage data + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + + # The usage should match the mock_usage values (aggregated across all calls) + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() - # The usage should match the mock_usage values (aggregated across all calls) - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + result = asyncio.run(run_test()) + assert result is not None + tx = events[0] + assert tx["type"] == "transaction" -def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items): + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has usage data + assert invoke_agent_span["description"] == "invoke_agent test_graph" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + + # The usage should match the mock_usage values (aggregated across all calls) + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_invoke_multiple_llm_calls_aggregate_usage( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls (e.g., when tools are used and multiple API calls are made). @@ -684,8 +1088,8 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -735,29 +1139,63 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 - invoke_agent_span = invoke_spans[0] + assert result is not None + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] - # Verify invoke_agent span has aggregated usage from both API calls - # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + assert len(invoke_spans) == 1 + invoke_agent_span = invoke_spans[0] + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 -def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items): + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls (e.g., when tools are used and multiple API calls are made). @@ -765,8 +1203,8 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_ sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -822,26 +1260,57 @@ async def run_test(): result = await wrapped_ainvoke(pregel, test_state) return result - result = asyncio.run(run_test()) - assert result is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 - invoke_agent_span = invoke_spans[0] + result = asyncio.run(run_test()) + assert result is not None + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + else: + events = capture_events() + + result = asyncio.run(run_test()) + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" - # Verify invoke_agent span has aggregated usage from both API calls - # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 -def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_invoke_span_includes_response_model( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans include the response model. When an agent makes multiple LLM calls, it should report the last model used. @@ -849,8 +1318,8 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items): sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -889,31 +1358,68 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - # Verify invoke_agent span has response model - assert invoke_agent_span["name"] == "invoke_agent test_graph" - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has response model + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has response model + assert invoke_agent_span["description"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) -def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_ainvoke_span_includes_response_model( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent spans include the response model. When an agent makes multiple LLM calls, it should report the last model used. @@ -921,8 +1427,8 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -967,28 +1473,62 @@ async def run_test(): result = await wrapped_ainvoke(pregel, test_state) return result - result = asyncio.run(run_test()) - assert result is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + result = asyncio.run(run_test()) + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - # Verify invoke_agent span has response model - assert invoke_agent_span["name"] == "invoke_agent test_graph" - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has response model + assert invoke_agent_span["name"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + events = capture_events() + + result = asyncio.run(run_test()) + assert result is not None + tx = events[0] + assert tx["type"] == "transaction" -def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items): + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span has response model + assert invoke_agent_span["description"] == "invoke_agent test_graph" + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_invoke_span_uses_last_response_model( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that when an agent makes multiple LLM calls (e.g., with tools), the invoke_agent span reports the last response model used. @@ -996,8 +1536,8 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items) sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -1049,30 +1589,66 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - # Verify invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) -def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_pregel_ainvoke_span_uses_last_response_model( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that when an agent makes multiple LLM calls (e.g., with tools), the invoke_agent span reports the last response model used. @@ -1080,8 +1656,8 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") test_state = { "messages": [ @@ -1139,24 +1715,51 @@ async def run_test(): result = await wrapped_ainvoke(pregel, test_state) return result - result = asyncio.run(run_test()) - assert result is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + result = asyncio.run(run_test()) + assert result is not None - invoke_agent_span = invoke_spans[0] + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - # Verify invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + events = capture_events() + + result = asyncio.run(run_test()) + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_agent_span = invoke_spans[0] + + # Verify invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) def test_complex_message_parsing(): @@ -1206,14 +1809,20 @@ def test_complex_message_parsing(): assert result[2]["function_call"]["name"] == "search" -def test_extraction_functions_complex_scenario(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_extraction_functions_complex_scenario( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test extraction functions with complex scenarios including multiple messages and edge cases.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") pregel = MockPregelInstance("complex_graph") test_state = {"messages": [MockMessage("Complex request", name="user")]} @@ -1248,29 +1857,59 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + assert result is not None - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == "Final response" + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] - import json + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == "Final response" + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + import json + + tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == "Final response" + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + import json + + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] if isinstance(tool_calls_data, str): tool_calls_data = json.loads(tool_calls_data) @@ -1281,14 +1920,20 @@ def original_invoke(self, *args, **kwargs): assert tool_calls_data[1]["function"]["name"] == "calculate" -def test_langgraph_message_role_mapping(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langgraph_message_role_mapping( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that Langgraph integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") # Mock a langgraph message with mixed roles class MockMessage: @@ -1309,51 +1954,83 @@ def __init__(self, content, message_type="human"): compiled_graph = MockCompiledGraph("test_graph") pregel = MockPregelInstance(compiled_graph) - with start_transaction(name="langgraph tx"): - # Use the wrapped invoke function directly - from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - wrapped_invoke = _wrap_pregel_invoke( - lambda self, state_data: {"result": "success"} - ) - wrapped_invoke(pregel, state_data) + with start_transaction(name="langgraph tx"): + # Use the wrapped invoke function directly + from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke - span = next(item.payload for item in items if item.type == "span") + wrapped_invoke = _wrap_pregel_invoke( + lambda self, state_data: {"result": "success"} + ) + wrapped_invoke(pregel, state_data) - # Verify that the span was created correctly - assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + span = next(item.payload for item in items if item.type == "span") - # If messages were captured, verify role mapping - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]: - import json + # Verify that the span was created correctly + assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) + # If messages were captured, verify role mapping + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]: + import json - # Find messages with specific content to verify role mapping - ai_message = next( - (msg for msg in stored_messages if msg.get("content") == "Hi there!"), None - ) - assistant_message = next( - (msg for msg in stored_messages if msg.get("content") == "How can I help?"), - None, - ) + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with start_transaction(name="langgraph tx"): + # Use the wrapped invoke function directly + from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke + + wrapped_invoke = _wrap_pregel_invoke( + lambda self, state_data: {"result": "success"} + ) + wrapped_invoke(pregel, state_data) + + (event,) = events + span = event["spans"][0] + + # Verify that the span was created correctly + assert span["op"] == "gen_ai.invoke_agent" + + # If messages were captured, verify role mapping + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]: + import json + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + # Find messages with specific content to verify role mapping + ai_message = next( + (msg for msg in stored_messages if msg.get("content") == "Hi there!"), None + ) + assistant_message = next( + (msg for msg in stored_messages if msg.get("content") == "How can I help?"), + None, + ) - if ai_message: - # "ai" should have been mapped to "assistant" - assert ai_message["role"] == "assistant" + if ai_message: + # "ai" should have been mapped to "assistant" + assert ai_message["role"] == "assistant" - if assistant_message: - # "assistant" should stay "assistant" - assert assistant_message["role"] == "assistant" + if assistant_message: + # "assistant" should stay "assistant" + assert assistant_message["role"] == "assistant" - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages if "role" in msg] - assert "ai" not in roles + # Verify no "ai" roles remain + roles = [msg["role"] for msg in stored_messages if "role" in msg] + assert "ai" not in roles -def test_langgraph_message_truncation(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_langgraph_message_truncation( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in Langgraph integration.""" import json @@ -1361,8 +2038,8 @@ def test_langgraph_message_truncation(sentry_init, capture_items): integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1382,30 +2059,66 @@ def test_langgraph_message_truncation(sentry_init, capture_items): def original_invoke(self, *args, **kwargs): return {"messages": args[0].get("messages", [])} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) > 0 + assert result is not None + + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) > 0 + + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + + messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + (tx,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) > 0 - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) - (tx,) = (item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index eb00f7838a..ab60779ed6 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -131,6 +131,7 @@ def __init__( self.created = 1234567890 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -143,18 +144,20 @@ def __init__( def test_nonstreaming_chat_completion( reset_litellm_executor, sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -176,12 +179,14 @@ def test_nonstreaming_chat_completion( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -190,38 +195,86 @@ def test_nonstreaming_chat_completion( litellm_utils.executor.shutdown(wait=True) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "litellm test" + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "litellm test" - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + else: + events = capture_events() - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + litellm_utils.executor.shutdown(wait=True) + + assert len(events) == 1 + (event,) = events - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert event["type"] == "transaction" + assert event["transaction"] == "litellm test" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -234,18 +287,20 @@ def test_nonstreaming_chat_completion( ) async def test_async_nonstreaming_chat_completion( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -267,53 +322,91 @@ async def test_async_nonstreaming_chat_completion( request_headers={"X-Stainless-Raw-Response": "true"}, ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with mock.patch.object( client.completions._client._client, "send", return_value=model_response, - ): - with start_transaction(name="litellm test"): - await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - client=client, - ) + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) - await GLOBAL_LOGGING_WORKER.flush() - await asyncio.sleep(0.5) + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["transaction"] == "litellm test" + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["transaction"] == "litellm test" - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + else: + assert len(events) == 1 + (event,) = events - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert event["type"] == "transaction" + assert event["transaction"] == "litellm test" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -326,19 +419,21 @@ async def test_async_nonstreaming_chat_completion( def test_streaming_chat_completion( reset_litellm_executor, sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, streaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") messages = [{"role": "user", "content": "Hello!"}] @@ -352,12 +447,14 @@ def test_streaming_chat_completion( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): response = litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -369,20 +466,54 @@ def test_streaming_chat_completion( streaming_handler.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + response = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + stream=True, + ) + for _ in response: + pass + + streaming_handler.executor.shutdown(wait=True) + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["op"] == OP.GEN_AI_CHAT + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -395,6 +526,7 @@ def test_streaming_chat_completion( ) async def test_async_streaming_chat_completion( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, @@ -402,13 +534,14 @@ async def test_async_streaming_chat_completion( async_iterator, server_side_event_chunks, streaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -424,12 +557,14 @@ async def test_async_streaming_chat_completion( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): response = await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, @@ -442,26 +577,63 @@ async def test_async_streaming_chat_completion( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + stream=True, + ) + async for _ in response: + pass + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["op"] == OP.GEN_AI_CHAT + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_embeddings_create( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """ Test that litellm.embedding() calls are properly instrumented. @@ -473,8 +645,8 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -484,53 +656,103 @@ def test_embeddings_create( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = litellm.embedding( - model="text-embedding-ada-002", - input="Hello, world!", - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - # Allow time for callbacks to complete (they may run in separate threads) - time.sleep(0.1) - # Response is processed by litellm, so just check it exists - assert response is not None + assert len(spans) == 1 + span = spans[0] - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] - - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert span["name"] == "embeddings text-embedding-ada-002" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] - == "text-embedding-ada-002" - ) - # Check that embeddings input is captured (it's JSON serialized) - embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == ["Hello, world!"] + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["name"] == "embeddings text-embedding-ada-002" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-ada-002" + ) + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + + assert json.loads(embeddings_input) == ["Hello, world!"] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["description"] == "embeddings text-embedding-ada-002" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + ) + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == ["Hello, world!"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_create( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """ Test that litellm.embedding() calls are properly instrumented. @@ -542,8 +764,8 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -553,61 +775,112 @@ async def test_async_embeddings_create( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = await litellm.aembedding( - model="text-embedding-ada-002", - input="Hello, world!", - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - await GLOBAL_LOGGING_WORKER.flush() - await asyncio.sleep(0.5) + assert len(spans) == 1 + span = spans[0] - # Response is processed by litellm, so just check it exists - assert response is not None + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["name"] == "embeddings text-embedding-ada-002" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-ada-002" + ) + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] - - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert span["name"] == "embeddings text-embedding-ada-002" - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] - == "text-embedding-ada-002" - ) - # Check that embeddings input is captured (it's JSON serialized) - embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == ["Hello, world!"] + assert json.loads(embeddings_input) == ["Hello, world!"] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["description"] == "embeddings text-embedding-ada-002" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + ) + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == ["Hello, world!"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_embeddings_create_with_list_input( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """Test embedding with list input.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -617,59 +890,108 @@ def test_embeddings_create_with_list_input( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = litellm.embedding( - model="text-embedding-ada-002", - input=["First text", "Second text", "Third text"], - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - # Allow time for callbacks to complete (they may run in separate threads) - time.sleep(0.1) - # Response is processed by litellm, so just check it exists - assert response is not None + assert len(spans) == 1 + span = spans[0] - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] - - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - # Check that list of embeddings input is captured (it's JSON serialized) - embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == [ - "First text", - "Second text", - "Third text", - ] + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_create_with_list_input( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """Test embedding with list input.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -679,59 +1001,109 @@ async def test_async_embeddings_create_with_list_input( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = await litellm.aembedding( - model="text-embedding-ada-002", - input=["First text", "Second text", "Third text"], - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - await GLOBAL_LOGGING_WORKER.flush() - await asyncio.sleep(0.5) + assert len(spans) == 1 + span = spans[0] - # Response is processed by litellm, so just check it exists - assert response is not None + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] - - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - # Check that list of embeddings input is captured (it's JSON serialized) - embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == [ - "First text", - "Second text", - "Third text", - ] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_embeddings_no_pii( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """Test that PII is not captured when disabled.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="test-key") @@ -741,53 +1113,95 @@ def test_embeddings_no_pii( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = litellm.embedding( - model="text-embedding-ada-002", - input="Hello, world!", - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - # Allow time for callbacks to complete (they may run in separate threads) - time.sleep(0.1) - # Response is processed by litellm, so just check it exists - assert response is not None + assert len(spans) == 1 + span = spans[0] - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + + assert len(events) == 1 + (event,) = events - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - # Check that embeddings input is NOT captured when PII is disabled - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_embeddings_no_pii( sentry_init, + capture_events, capture_items, get_model_response, openai_embedding_model_response, clear_litellm_cache, + stream_gen_ai_spans, ): """Test that PII is not captured when disabled.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="test-key") @@ -797,47 +1211,92 @@ async def test_async_embeddings_no_pii( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - response = await litellm.aembedding( - model="text-embedding-ada-002", - input="Hello, world!", - client=client, + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + spans = [item.payload for item in items if item.type == "span"] + spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - await GLOBAL_LOGGING_WORKER.flush() - await asyncio.sleep(0.5) + assert len(spans) == 1 + span = spans[0] - # Response is processed by litellm, so just check it exists - assert response is not None + assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) - spans = [item.payload for item in items if item.type == "span"] - spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(spans) == 1 - span = spans[0] + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] - assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS - # Check that embeddings input is NOT captured when PII is disabled - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + assert span["op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_exception_handling( - reset_litellm_executor, sentry_init, capture_items, get_rate_limit_model_response + reset_litellm_executor, + sentry_init, + capture_events, + capture_items, + get_rate_limit_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") messages = [{"role": "user", "content": "Hello!"}] @@ -845,37 +1304,65 @@ def test_exception_handling( model_response = get_rate_limit_model_response() - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - with pytest.raises(litellm.RateLimitError): - litellm.completion( - model="gpt-3.5-turbo", - messages=messages, - client=client, - ) + if stream_gen_ai_spans: + items = capture_items("event") - # Find the error event - error_events = [ - item.payload - for item in items - if item.type == "event" and item.payload.get("level") == "error" - ] + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"), pytest.raises( + litellm.RateLimitError + ): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + # Find the error event + error_events = [ + item.payload + for item in items + if item.type == "event" and item.payload.get("level") == "error" + ] + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"), pytest.raises( + litellm.RateLimitError + ): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + # Should have error event and transaction + assert len(events) >= 1 + # Find the error event + error_events = [e for e in events if e.get("level") == "error"] assert len(error_events) == 1 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_exception_handling( - sentry_init, capture_items, get_rate_limit_model_response + sentry_init, + capture_events, + capture_items, + get_rate_limit_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") messages = [{"role": "user", "content": "Hello!"}] @@ -883,40 +1370,66 @@ async def test_async_exception_handling( model_response = get_rate_limit_model_response() - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - with pytest.raises(litellm.RateLimitError): - await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - client=client, - ) + if stream_gen_ai_spans: + items = capture_items("event") - # Find the error event - error_events = [ - item.payload - for item in items - if item.type == "event" and item.payload.get("level") == "error" - ] + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"), pytest.raises( + litellm.RateLimitError + ): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + # Find the error event + error_events = [ + item.payload + for item in items + if item.type == "event" and item.payload.get("level") == "error" + ] + else: + events = capture_events() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"), pytest.raises( + litellm.RateLimitError + ): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + # Should have error event and transaction + assert len(events) >= 1 + # Find the error event + error_events = [e for e in events if e.get("level") == "error"] assert len(error_events) == 1 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_origin( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -938,12 +1451,35 @@ def test_span_origin( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + litellm_utils.executor.shutdown(wait=True) + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm" + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -952,28 +1488,30 @@ def test_span_origin( litellm_utils.executor.shutdown(wait=True) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.litellm" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_multiple_providers( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, nonstreaming_anthropic_model_response, nonstreaming_google_genai_model_response, + stream_gen_ai_spans, ): """Test that the integration correctly identifies different providers.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction") messages = [{"role": "user", "content": "Hello!"}] @@ -994,12 +1532,14 @@ def test_multiple_providers( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - openai_client.completions._client._client, - "send", - return_value=openai_model_response, - ): - with start_transaction(name="test gpt-3.5-turbo"): + if stream_gen_ai_spans: + items = capture_items("transaction") + + with mock.patch.object( + openai_client.completions._client._client, + "send", + return_value=openai_model_response, + ), start_transaction(name="test gpt-3.5-turbo"): litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -1008,21 +1548,20 @@ def test_multiple_providers( litellm_utils.executor.shutdown(wait=True) - _reset_litellm_executor() + _reset_litellm_executor() - anthropic_client = HTTPHandler() - anthropic_model_response = get_model_response( - nonstreaming_anthropic_model_response, - serialize_pydantic=True, - request_headers={"X-Stainless-Raw-Response": "true"}, - ) + anthropic_client = HTTPHandler() + anthropic_model_response = get_model_response( + nonstreaming_anthropic_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) - with mock.patch.object( - anthropic_client, - "post", - return_value=anthropic_model_response, - ): - with start_transaction(name="test claude-3-opus-20240229"): + with mock.patch.object( + anthropic_client, + "post", + return_value=anthropic_model_response, + ), start_transaction(name="test claude-3-opus-20240229"): litellm.completion( model="claude-3-opus-20240229", messages=messages, @@ -1032,20 +1571,87 @@ def test_multiple_providers( litellm_utils.executor.shutdown(wait=True) - _reset_litellm_executor() + _reset_litellm_executor() - gemini_client = HTTPHandler() - gemini_model_response = get_model_response( - nonstreaming_google_genai_model_response, - serialize_pydantic=True, - ) + gemini_client = HTTPHandler() + gemini_model_response = get_model_response( + nonstreaming_google_genai_model_response, + serialize_pydantic=True, + ) - with mock.patch.object( - gemini_client, - "post", - return_value=gemini_model_response, - ): - with start_transaction(name="test gemini/gemini-pro"): + with mock.patch.object( + gemini_client, + "post", + return_value=gemini_model_response, + ), start_transaction(name="test gemini/gemini-pro"): + litellm.completion( + model="gemini/gemini-pro", + messages=messages, + client=gemini_client, + api_key="test-key", + ) + + litellm_utils.executor.shutdown(wait=True) + + events = [item.payload for item in items if item.type == "transaction"] + assert len(events) == 3 + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + # The provider should be detected by litellm.get_llm_provider + assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + openai_client.completions._client._client, + "send", + return_value=openai_model_response, + ), start_transaction(name="test gpt-3.5-turbo"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=openai_client, + ) + + litellm_utils.executor.shutdown(wait=True) + + _reset_litellm_executor() + + anthropic_client = HTTPHandler() + anthropic_model_response = get_model_response( + nonstreaming_anthropic_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + anthropic_client, + "post", + return_value=anthropic_model_response, + ), start_transaction(name="test claude-3-opus-20240229"): + litellm.completion( + model="claude-3-opus-20240229", + messages=messages, + client=anthropic_client, + api_key="test-key", + ) + + litellm_utils.executor.shutdown(wait=True) + + _reset_litellm_executor() + + gemini_client = HTTPHandler() + gemini_model_response = get_model_response( + nonstreaming_google_genai_model_response, + serialize_pydantic=True, + ) + + with mock.patch.object( + gemini_client, + "post", + return_value=gemini_model_response, + ), start_transaction(name="test gemini/gemini-pro"): litellm.completion( model="gemini/gemini-pro", messages=messages, @@ -1055,30 +1661,32 @@ def test_multiple_providers( litellm_utils.executor.shutdown(wait=True) - events = [item.payload for item in items if item.type == "transaction"] - assert len(events) == 3 + assert len(events) == 3 - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - # The provider should be detected by litellm.get_llm_provider - assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] + for i in range(3): + span = events[i]["spans"][0] + # The provider should be detected by litellm.get_llm_provider + assert SPANDATA.GEN_AI_SYSTEM in span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_multiple_providers( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, nonstreaming_anthropic_model_response, nonstreaming_google_genai_model_response, + stream_gen_ai_spans, ): """Test that the integration correctly identifies different providers.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -1099,12 +1707,14 @@ async def test_async_multiple_providers( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - openai_client.completions._client._client, - "send", - return_value=openai_model_response, - ): - with start_transaction(name="test gpt-3.5-turbo"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + openai_client.completions._client._client, + "send", + return_value=openai_model_response, + ), start_transaction(name="test gpt-3.5-turbo"): await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, @@ -1114,21 +1724,20 @@ async def test_async_multiple_providers( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - _reset_litellm_executor() + _reset_litellm_executor() - anthropic_client = AsyncHTTPHandler() - anthropic_model_response = get_model_response( - nonstreaming_anthropic_model_response, - serialize_pydantic=True, - request_headers={"X-Stainless-Raw-Response": "True"}, - ) + anthropic_client = AsyncHTTPHandler() + anthropic_model_response = get_model_response( + nonstreaming_anthropic_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) - with mock.patch.object( - anthropic_client, - "post", - return_value=anthropic_model_response, - ): - with start_transaction(name="test claude-3-opus-20240229"): + with mock.patch.object( + anthropic_client, + "post", + return_value=anthropic_model_response, + ), start_transaction(name="test claude-3-opus-20240229"): await litellm.acompletion( model="claude-3-opus-20240229", messages=messages, @@ -1139,20 +1748,90 @@ async def test_async_multiple_providers( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - _reset_litellm_executor() + _reset_litellm_executor() - gemini_client = AsyncHTTPHandler() - gemini_model_response = get_model_response( - nonstreaming_google_genai_model_response, - serialize_pydantic=True, - ) + gemini_client = AsyncHTTPHandler() + gemini_model_response = get_model_response( + nonstreaming_google_genai_model_response, + serialize_pydantic=True, + ) - with mock.patch.object( - gemini_client, - "post", - return_value=gemini_model_response, - ): - with start_transaction(name="test gemini/gemini-pro"): + with mock.patch.object( + gemini_client, + "post", + return_value=gemini_model_response, + ), start_transaction(name="test gemini/gemini-pro"): + await litellm.acompletion( + model="gemini/gemini-pro", + messages=messages, + client=gemini_client, + api_key="test-key", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + events = [item.payload for item in items if item.type == "transaction"] + assert len(events) == 3 + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + # The provider should be detected by litellm.get_llm_provider + assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] + else: + events = capture_events() + + with mock.patch.object( + openai_client.completions._client._client, + "send", + return_value=openai_model_response, + ), start_transaction(name="test gpt-3.5-turbo"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=openai_client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + _reset_litellm_executor() + + anthropic_client = AsyncHTTPHandler() + anthropic_model_response = get_model_response( + nonstreaming_anthropic_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + + with mock.patch.object( + anthropic_client, + "post", + return_value=anthropic_model_response, + ), start_transaction(name="test claude-3-opus-20240229"): + await litellm.acompletion( + model="claude-3-opus-20240229", + messages=messages, + client=anthropic_client, + api_key="test-key", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + _reset_litellm_executor() + + gemini_client = AsyncHTTPHandler() + gemini_model_response = get_model_response( + nonstreaming_google_genai_model_response, + serialize_pydantic=True, + ) + + with mock.patch.object( + gemini_client, + "post", + return_value=gemini_model_response, + ), start_transaction(name="test gemini/gemini-pro"): await litellm.acompletion( model="gemini/gemini-pro", messages=messages, @@ -1163,28 +1842,30 @@ async def test_async_multiple_providers( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - events = [item.payload for item in items if item.type == "transaction"] - assert len(events) == 3 + assert len(events) == 3 - spans = [item.payload for item in items if item.type == "span"] - for span in spans: - # The provider should be detected by litellm.get_llm_provider - assert SPANDATA.GEN_AI_SYSTEM in span["attributes"] + for i in range(3): + span = events[i]["spans"][0] + # The provider should be detected by litellm.get_llm_provider + assert SPANDATA.GEN_AI_SYSTEM in span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_additional_parameters( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that additional parameters are captured.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = OpenAI(api_key="test-key") @@ -1205,12 +1886,14 @@ def test_additional_parameters( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -1224,36 +1907,74 @@ def test_additional_parameters( litellm_utils.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + temperature=0.7, + max_tokens=100, + top_p=0.9, + frequency_penalty=0.5, + presence_penalty=0.5, + ) - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + litellm_utils.executor.shutdown(wait=True) + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_additional_parameters( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that additional parameters are captured.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = AsyncOpenAI(api_key="test-key") @@ -1274,12 +1995,14 @@ async def test_async_additional_parameters( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, @@ -1294,35 +2017,74 @@ async def test_async_additional_parameters( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + else: + events = capture_events() - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + temperature=0.7, + max_tokens=100, + top_p=0.9, + frequency_penalty=0.5, + presence_penalty=0.5, + ) + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_no_integration( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = OpenAI(api_key="test-key") @@ -1343,12 +2105,37 @@ def test_no_integration( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + litellm_utils.executor.shutdown(wait=True) + + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -1357,28 +2144,32 @@ def test_no_integration( litellm_utils.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) + (event,) = events + # Should still have the transaction, but no child spans since integration is off + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) assert len(chat_spans) == 0 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_no_integration( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] client = AsyncOpenAI(api_key="test-key") @@ -1399,12 +2190,14 @@ async def test_async_no_integration( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, @@ -1414,23 +2207,54 @@ async def test_async_no_integration( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + # Should still have the transaction, but no child spans since integration is off + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) assert len(chat_spans) == 0 -def test_response_without_usage(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_response_without_usage( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test handling of responses without usage information.""" sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [{"role": "user", "content": "Hello!"}] @@ -1444,25 +2268,51 @@ def test_response_without_usage(sentry_init, capture_items): }, )() - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (span,) = (item.payload for item in items if item.type == "span") + + # Span should still be created even without usage info + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat gpt-3.5-turbo" + else: + events = capture_events() - (span,) = (item.payload for item in items if item.type == "span") + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - # Span should still be created even without usage info - assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - assert span["name"] == "chat gpt-3.5-turbo" + (event,) = events + (span,) = event["spans"] + + # Span should still be created even without usage info + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat gpt-3.5-turbo" def test_integration_setup(sentry_init): @@ -1478,14 +2328,20 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -def test_litellm_message_truncation(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_litellm_message_truncation( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1499,38 +2355,76 @@ def test_litellm_message_truncation(sentry_init, capture_items): ] mock_response = MockCompletionResponse() - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT + ] + + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + else: + events = capture_events() + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") + if stream_gen_ai_spans: + tx = next(item.payload for item in items if item.type == "transaction") + else: + pass assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -1539,19 +2433,22 @@ def test_litellm_message_truncation(sentry_init, capture_items): IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_binary_content_encoding_image_url( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1583,12 +2480,14 @@ def test_binary_content_encoding_image_url( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-4-vision-preview", messages=messages, @@ -1598,16 +2497,42 @@ def test_binary_content_encoding_image_url( litellm_utils.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + litellm_utils.executor.shutdown(wait=True) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) blob_item = next( ( @@ -1628,19 +2553,22 @@ def test_binary_content_encoding_image_url( ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_image_url( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1672,12 +2600,14 @@ async def test_async_binary_content_encoding_image_url( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): await litellm.acompletion( model="gpt-4-vision-preview", messages=messages, @@ -1688,16 +2618,43 @@ async def test_async_binary_content_encoding_image_url( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) blob_item = next( ( @@ -1718,19 +2675,22 @@ async def test_async_binary_content_encoding_image_url( ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_binary_content_encoding_mixed_content( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1763,12 +2723,14 @@ def test_binary_content_encoding_mixed_content( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-4-vision-preview", messages=messages, @@ -1778,16 +2740,42 @@ def test_binary_content_encoding_mixed_content( litellm_utils.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + litellm_utils.executor.shutdown(wait=True) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content_items = [ item for msg in messages_data if "content" in msg for item in msg["content"] @@ -1796,19 +2784,22 @@ def test_binary_content_encoding_mixed_content( assert any(item.get("type") == "blob" for item in content_items) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_mixed_content( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1841,12 +2832,14 @@ async def test_async_binary_content_encoding_mixed_content( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): await litellm.acompletion( model="gpt-4-vision-preview", messages=messages, @@ -1857,16 +2850,44 @@ async def test_async_binary_content_encoding_mixed_content( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + if stream_gen_ai_spans: + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content_items = [ item for msg in messages_data if "content" in msg for item in msg["content"] @@ -1875,19 +2896,22 @@ async def test_async_binary_content_encoding_mixed_content( assert any(item.get("type") == "blob" for item in content_items) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_binary_content_encoding_uri_type( reset_litellm_executor, sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -1918,12 +2942,13 @@ def test_binary_content_encoding_uri_type( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): litellm.completion( model="gpt-4-vision-preview", messages=messages, @@ -1933,16 +2958,46 @@ def test_binary_content_encoding_uri_type( litellm_utils.executor.shutdown(wait=True) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + + assert len(chat_spans) == 1 + span = chat_spans[0] + + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + litellm.completion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + litellm_utils.executor.shutdown(wait=True) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + + assert len(chat_spans) == 1 + span = chat_spans[0] + + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( ( @@ -1958,19 +3013,22 @@ def test_binary_content_encoding_uri_type( assert uri_item["uri"] == "https://example.com/image.jpg" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio(loop_scope="session") async def test_async_binary_content_encoding_uri_type( sentry_init, + capture_events, capture_items, get_model_response, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") messages = [ { @@ -2001,12 +3059,14 @@ async def test_async_binary_content_encoding_uri_type( request_headers={"X-Stainless-Raw-Response": "true"}, ) - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): await litellm.acompletion( model="gpt-4-vision-preview", messages=messages, @@ -2017,16 +3077,47 @@ async def test_async_binary_content_encoding_uri_type( await GLOBAL_LOGGING_WORKER.flush() await asyncio.sleep(0.5) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = list( - x - for x in spans - if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - and x["attributes"]["sentry.origin"] == "auto.ai.litellm" - ) - assert len(chat_spans) == 1 - span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + spans = [item.payload for item in items if item.type == "span"] + chat_spans = list( + x + for x in spans + if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + and x["attributes"]["sentry.origin"] == "auto.ai.litellm" + ) + + assert len(chat_spans) == 1 + span = chat_spans[0] + + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + + assert len(chat_spans) == 1 + span = chat_spans[0] + + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( ( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index d5e78bad99..c80b2df513 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -103,6 +103,7 @@ async def __call__(self, *args, **kwargs): ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -113,17 +114,19 @@ async def __call__(self, *args, **kwargs): ) def test_nonstreaming_chat_completion_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -140,52 +143,100 @@ def test_nonstreaming_chat_completion_no_prompts( ) ) - with start_transaction(name="openai tx"): - response = ( - client.chat.completions.create( - model="some-model", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "hello"}, - ], - max_tokens=100, - presence_penalty=0.1, - frequency_penalty=0.2, - temperature=0.7, - top_p=0.9, + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + .choices[0] + .message.content ) - .choices[0] - .message.content - ) - assert response == "the model response" + assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( - "messages", + "get_messages", [ pytest.param( - [ + lambda: [ { "role": "system", "content": "You are a helpful assistant.", @@ -195,7 +246,7 @@ def test_nonstreaming_chat_completion_no_prompts( id="blocks", ), pytest.param( - [ + lambda: [ { "role": "system", "content": [ @@ -208,7 +259,7 @@ def test_nonstreaming_chat_completion_no_prompts( id="parts", ), pytest.param( - iter( + lambda: iter( [ { "role": "system", @@ -226,17 +277,19 @@ def test_nonstreaming_chat_completion_no_prompts( ) def test_nonstreaming_chat_completion( sentry_init, + capture_events, capture_items, - messages, + get_messages, request, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -253,63 +306,131 @@ def test_nonstreaming_chat_completion( ) ) - with start_transaction(name="openai tx"): - response = ( - client.chat.completions.create( - model="some-model", - messages=messages, - max_tokens=100, - presence_penalty=0.1, - frequency_penalty=0.2, - temperature=0.7, - top_p=0.9, + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", + messages=get_messages(), + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + .choices[0] + .message.content ) - .choices[0] - .message.content - ) - assert response == "the model response" + assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - param_id = request.node.callspec.id - if "blocks" in param_id: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] + events = capture_events() + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", + messages=get_messages(), + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -321,17 +442,19 @@ def test_nonstreaming_chat_completion( ) async def test_nonstreaming_chat_completion_async_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = mock.AsyncMock( @@ -348,50 +471,95 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ) ) - with start_transaction(name="openai tx"): - response = await client.chat.completions.create( - model="some-model", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "hello"}, - ], - max_tokens=100, - presence_penalty=0.1, - frequency_penalty=0.2, - temperature=0.7, - top_p=0.9, - ) - response = response.choices[0].message.content + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + response = await client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response = response.choices[0].message.content + + assert response == "the model response" + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with start_transaction(name="openai tx"): + response = await client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response = response.choices[0].message.content - assert response == "the model response" + assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( - "messages", + "get_messages", [ pytest.param( - [ + lambda: [ { "role": "system", "content": "You are a helpful assistant.", @@ -401,7 +569,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( id="blocks", ), pytest.param( - [ + lambda: [ { "role": "system", "content": [ @@ -414,7 +582,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( id="parts", ), pytest.param( - iter( + lambda: iter( [ { "role": "system", @@ -432,17 +600,19 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ) async def test_nonstreaming_chat_completion_async( sentry_init, + capture_events, capture_items, - messages, + get_messages, request, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( @@ -459,58 +629,122 @@ async def test_nonstreaming_chat_completion_async( ) ) - with start_transaction(name="openai tx"): - response = await client.chat.completions.create( - model="some-model", - messages=messages, - max_tokens=100, - presence_penalty=0.1, - frequency_penalty=0.2, - temperature=0.7, - top_p=0.9, - ) - response = response.choices[0].message.content + if stream_gen_ai_spans: + items = capture_items("span") - assert response == "the model response" + with start_transaction(name="openai tx"): + response = await client.chat.completions.create( + model="some-model", + messages=get_messages(), + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response = response.choices[0].message.content - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert response == "the model response" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - param_id = request.node.callspec.id - if "blocks" in param_id: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] + events = capture_events() + + with start_transaction(name="openai tx"): + response = await client.chat.completions.create( + model="some-model", + messages=get_messages(), + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response = response.choices[0].message.content + + assert response == "the model response" + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 def tiktoken_encoding_if_installed(): @@ -523,6 +757,7 @@ def tiktoken_encoding_if_installed(): # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -533,11 +768,13 @@ def tiktoken_encoding_if_installed(): ) def test_streaming_chat_completion_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -548,8 +785,8 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -599,12 +836,14 @@ def test_streaming_chat_completion_no_prompts( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", messages=[ @@ -622,53 +861,111 @@ def test_streaming_chat_completion_no_prompts( map(lambda x: x.choices[0].delta.content, response_stream) ) - assert response_string == "hello world" + assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + stream=True, + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + + assert response_string == "hello world" + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.", ) def test_streaming_chat_completion_with_usage_in_stream( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """When stream_options=include_usage is set, token usage comes from the final chunk's usage field.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -710,12 +1007,14 @@ def test_streaming_chat_completion_with_usage_in_stream( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "hello"}], @@ -725,30 +1024,57 @@ def test_streaming_chat_completion_with_usage_in_stream( for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "hello"}], + stream=True, + stream_options={"include_usage": True}, + ) + for _ in response_stream: + pass + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.", ) def test_streaming_chat_completion_empty_content_preserves_token_usage( sentry_init, + capture_events, capture_items, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): """Token usage from the stream is recorded even when no content is produced (e.g. content filter).""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -771,12 +1097,14 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "hello"}], @@ -786,13 +1114,38 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["attributes"] - assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "hello"}], + stream=True, + stream_options={"include_usage": True}, + ) + for _ in response_stream: + pass + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["data"] + assert span["data"]["gen_ai.usage.total_tokens"] == 20 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.", @@ -800,18 +1153,20 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( @pytest.mark.asyncio async def test_streaming_chat_completion_empty_content_preserves_token_usage_async( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """Token usage from the stream is recorded even when no content is produced - async variant.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -836,12 +1191,14 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "hello"}], @@ -851,13 +1208,38 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy async for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["attributes"] - assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 + else: + events = capture_events() + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "hello"}], + stream=True, + stream_options={"include_usage": True}, + ) + async for _ in response_stream: + pass + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["data"] + assert span["data"]["gen_ai.usage.total_tokens"] == 20 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.", @@ -865,18 +1247,20 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy @pytest.mark.asyncio async def test_streaming_chat_completion_async_with_usage_in_stream( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """When stream_options=include_usage is set, token usage comes from the final chunk's usage field (async).""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -920,12 +1304,36 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "hello"}], + stream=True, + stream_options={"include_usage": True}, + ) + async for _ in response_stream: + pass + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "hello"}], @@ -935,19 +1343,22 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( async for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( - "messages", + "get_messages", [ pytest.param( - [ + lambda: [ { "role": "system", "content": "You are a helpful assistant.", @@ -957,7 +1368,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( id="blocks", ), pytest.param( - [ + lambda: [ { "role": "system", "content": [ @@ -970,7 +1381,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( id="parts", ), pytest.param( - iter( + lambda: iter( [ { "role": "system", @@ -988,11 +1399,13 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( ) def test_streaming_chat_completion( sentry_init, + capture_events, capture_items, - messages, + get_messages, request, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1003,8 +1416,8 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -1054,15 +1467,17 @@ def test_streaming_chat_completion( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", - messages=messages, + messages=get_messages(), stream=True, max_tokens=100, presence_penalty=0.1, @@ -1073,61 +1488,145 @@ def test_streaming_chat_completion( response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) ) - assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert response_string == "hello world" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - param_id = request.node.callspec.id - if "blocks" in param_id: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + if "blocks" in param_id: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] + events = capture_events() - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=get_messages(), + stream=True, + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_string == "hello world" - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + param_id = request.node.callspec.id if "blocks" in param_id: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] else: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 12 + assert span["data"]["gen_ai.usage.total_tokens"] == 14 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -1139,12 +1638,14 @@ def test_streaming_chat_completion( ) async def test_streaming_chat_completion_async_no_prompts( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1155,8 +1656,8 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -1208,12 +1709,14 @@ async def test_streaming_chat_completion_async_no_prompts( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", messages=[ @@ -1232,44 +1735,102 @@ async def test_streaming_chat_completion_async_no_prompts( async for x in response_stream: response_string += x.choices[0].delta.content - assert response_string == "hello world" + assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], + stream=True, + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) + + response_string = "" + async for x in response_stream: + response_string += x.choices[0].delta.content + + assert response_string == "hello world" + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( - "messages", + "get_messages", [ pytest.param( - [ + lambda: [ { "role": "system", "content": "You are a helpful assistant.", @@ -1279,7 +1840,7 @@ async def test_streaming_chat_completion_async_no_prompts( id="blocks", ), pytest.param( - [ + lambda: [ { "role": "system", "content": [ @@ -1292,7 +1853,7 @@ async def test_streaming_chat_completion_async_no_prompts( id="parts", ), pytest.param( - iter( + lambda: iter( [ { "role": "system", @@ -1310,12 +1871,14 @@ async def test_streaming_chat_completion_async_no_prompts( ) async def test_streaming_chat_completion_async( sentry_init, + capture_events, capture_items, - messages, + get_messages, request, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -1326,8 +1889,8 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1380,15 +1943,17 @@ async def test_streaming_chat_completion_async( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", - messages=messages, + messages=get_messages(), stream=True, max_tokens=100, presence_penalty=0.1, @@ -1401,127 +1966,299 @@ async def test_streaming_chat_completion_async( async for x in response_stream: response_string += x.choices[0].delta.content - assert response_string == "hello world" + assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - param_id = request.node.callspec.id - if "blocks" in param_id: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + if "blocks" in param_id: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + else: + assert json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + if "blocks" in param_id: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] + events = capture_events() - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=get_messages(), + stream=True, + max_tokens=100, + presence_penalty=0.1, + frequency_penalty=0.2, + temperature=0.7, + top_p=0.9, + ) - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import + response_string = "" + async for x in response_stream: + response_string += x.choices[0].delta.content + + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + param_id = request.node.callspec.id if "blocks" in param_id: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] else: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import -def test_bad_chat_completion(sentry_init, capture_items): + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 12 + assert span["data"]["gen_ai.usage.total_tokens"] == 14 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_bad_chat_completion( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock( - side_effect=OpenAIError("API rate limit reached") - ) - with pytest.raises(OpenAIError): - client.chat.completions.create( - model="some-model", - messages=[{"role": "system", "content": "hello"}], + if stream_gen_ai_spans: + items = capture_items("event") + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + ) + + (event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + ) + + (event,) = events - (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" -def test_span_status_error(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_status_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") - with start_transaction(name="test"): - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock( - side_effect=OpenAIError("API rate limit reached") - ) - with pytest.raises(OpenAIError): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with start_transaction(name="test"): + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + else: + events = capture_events() - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + with start_transaction(name="test"): + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + ) - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_bad_chat_completion_async(sentry_init, capture_items): +async def test_bad_chat_completion_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( side_effect=OpenAIError("API rate limit reached") ) - with pytest.raises(OpenAIError): - await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) + if stream_gen_ai_spans: + items = capture_items("event") + + with pytest.raises(OpenAIError): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + with pytest.raises(OpenAIError): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events - (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -1531,14 +2268,19 @@ async def test_bad_chat_completion_async(sentry_init, capture_items): ], ) def test_embeddings_create_no_pii( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") @@ -1552,59 +2294,88 @@ def test_embeddings_create_no_pii( ), ) - client.embeddings._post = mock.Mock(return_value=returned_embedding) - with start_transaction(name="openai tx"): - response = client.embeddings.create( - input="hello", model="text-embedding-3-large" + client.embeddings._post = mock.Mock(return_value=returned_embedding) + + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-3-large" ) - assert len(response.data[0].embedding) == 3 + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.embeddings" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( - "input", + "get_input", [ pytest.param( - "hello", + lambda: "hello", id="string", ), pytest.param( - ["First text", "Second text", "Third text"], + lambda: ["First text", "Second text", "Third text"], id="string_sequence", ), pytest.param( - iter(["First text", "Second text", "Third text"]), + lambda: iter(["First text", "Second text", "Third text"]), id="string_iterable", ), pytest.param( - [5, 8, 13, 21, 34], + lambda: [5, 8, 13, 21, 34], id="tokens", ), pytest.param( - iter( + lambda: iter( [5, 8, 13, 21, 34], ), id="token_iterable", ), pytest.param( - [ + lambda: [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ], id="tokens_sequence", ), pytest.param( - iter( + lambda: iter( [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], @@ -1614,13 +2385,20 @@ def test_embeddings_create_no_pii( ), ], ) -def test_embeddings_create(sentry_init, capture_items, input, request): +def test_embeddings_create( + sentry_init, + capture_events, + capture_items, + get_input, + request, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") @@ -1635,45 +2413,111 @@ def test_embeddings_create(sentry_init, capture_items, input, request): ) client.embeddings._post = mock.Mock(return_value=returned_embedding) - with start_transaction(name="openai tx"): - response = client.embeddings.create(input=input, model="text-embedding-3-large") - assert len(response.data[0].embedding) == 3 + if stream_gen_ai_spans: + items = capture_items("span") - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input=get_input(), model="text-embedding-3-large" + ) - param_id = request.node.callspec.id - if param_id == "string": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - "hello" - ] - elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - "First text", - "Second text", - "Third text", - ] - elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - 5, - 8, - 13, - 21, - 34, - ] + assert len(response.data[0].embedding) == 3 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-3-large" + ) + + param_id = request.node.callspec.id + if ( + "string" in param_id + and "string_sequence" not in param_id + and "string_iterable" not in param_id + ): + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] + elif "string_sequence" in param_id or "string_iterable" in param_id: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "First text", + "Second text", + "Third text", + ] + elif ( + "tokens" in param_id or "token_iterable" in param_id + ) and "tokens_sequence" not in param_id: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + 5, + 8, + 13, + 21, + 34, + ] + else: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + [5, 8, 13, 21, 34], + [8, 13, 21, 34, 55], + ] + + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - [5, 8, 13, 21, 34], - [8, 13, 21, 34, 55], - ] + events = capture_events() + + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input=get_input(), model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.embeddings" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + + param_id = request.node.callspec.id + if ( + "string" in param_id + and "string_sequence" not in param_id + and "string_iterable" not in param_id + ): + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] + elif "string_sequence" in param_id or "string_iterable" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "First text", + "Second text", + "Third text", + ] + elif ( + "tokens" in param_id or "token_iterable" in param_id + ) and "tokens_sequence" not in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + 5, + 8, + 13, + 21, + 34, + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + [5, 8, 13, 21, 34], + [8, 13, 21, 34, 55], + ] - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -1684,14 +2528,19 @@ def test_embeddings_create(sentry_init, capture_items, input, request): ], ) async def test_embeddings_create_async_no_pii( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1706,59 +2555,88 @@ async def test_embeddings_create_async_no_pii( ) client.embeddings._post = AsyncMock(return_value=returned_embedding) - with start_transaction(name="openai tx"): - response = await client.embeddings.create( - input="hello", model="text-embedding-3-large" + + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + response = await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-3-large" ) - assert len(response.data[0].embedding) == 3 + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + else: + events = capture_events() + + with start_transaction(name="openai tx"): + response = await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.embeddings" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( - "input", + "get_input", [ pytest.param( - "hello", + lambda: "hello", id="string", ), pytest.param( - ["First text", "Second text", "Third text"], + lambda: ["First text", "Second text", "Third text"], id="string_sequence", ), pytest.param( - iter(["First text", "Second text", "Third text"]), + lambda: iter(["First text", "Second text", "Third text"]), id="string_iterable", ), pytest.param( - [5, 8, 13, 21, 34], + lambda: [5, 8, 13, 21, 34], id="tokens", ), pytest.param( - iter( + lambda: iter( [5, 8, 13, 21, 34], ), id="token_iterable", ), pytest.param( - [ + lambda: [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ], id="tokens_sequence", ), pytest.param( - iter( + lambda: iter( [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], @@ -1768,13 +2646,20 @@ async def test_embeddings_create_async_no_pii( ), ], ) -async def test_embeddings_create_async(sentry_init, capture_items, input, request): +async def test_embeddings_create_async( + sentry_init, + capture_events, + capture_items, + get_input, + request, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1789,60 +2674,131 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques ) client.embeddings._post = AsyncMock(return_value=returned_embedding) - with start_transaction(name="openai tx"): - response = await client.embeddings.create( - input=input, model="text-embedding-3-large" - ) - assert len(response.data[0].embedding) == 3 + if stream_gen_ai_spans: + items = capture_items("span") - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + with start_transaction(name="openai tx"): + response = await client.embeddings.create( + input=get_input(), model="text-embedding-3-large" + ) - param_id = request.node.callspec.id - if param_id == "string": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - "hello" - ] - elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - "First text", - "Second text", - "Third text", - ] - elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - 5, - 8, - 13, - 21, - 34, - ] + assert len(response.data[0].embedding) == 3 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert ( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] + == "text-embedding-3-large" + ) + + param_id = request.node.callspec.id + + if ( + "string" in param_id + and "string_sequence" not in param_id + and "string_iterable" not in param_id + ): + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] + elif "string_sequence" in param_id or "string_iterable" in param_id: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "First text", + "Second text", + "Third text", + ] + elif ( + "tokens" in param_id or "token_iterable" in param_id + ) and "tokens_sequence" not in param_id: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + 5, + 8, + 13, + 21, + 34, + ] + else: + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + [5, 8, 13, 21, 34], + [8, 13, 21, 34, 55], + ] + + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ - [5, 8, 13, 21, 34], - [8, 13, 21, 34, 55], - ] + events = capture_events() + + with start_transaction(name="openai tx"): + response = await client.embeddings.create( + input=get_input(), model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.embeddings" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + + param_id = request.node.callspec.id + + if ( + "string" in param_id + and "string_sequence" not in param_id + and "string_iterable" not in param_id + ): + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] + elif "string_sequence" in param_id or "string_iterable" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "First text", + "Second text", + "Third text", + ] + elif ( + "tokens" in param_id or "token_iterable" in param_id + ) and "tokens_sequence" not in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + 5, + 8, + 13, + 21, + 34, + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + [5, 8, 13, 21, 34], + [8, 13, 21, 34, 55], + ] - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) def test_embeddings_create_raises_error( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") client = OpenAI(api_key="z") @@ -1850,27 +2806,44 @@ def test_embeddings_create_raises_error( side_effect=OpenAIError("API rate limit reached") ) - with pytest.raises(OpenAIError): - client.embeddings.create(input="hello", model="text-embedding-3-large") + if stream_gen_ai_spans: + items = capture_items("event") + + with pytest.raises(OpenAIError): + client.embeddings.create(input="hello", model="text-embedding-3-large") + + (event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + with pytest.raises(OpenAIError): + client.embeddings.create(input="hello", model="text-embedding-3-large") + + (event,) = events - (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) async def test_embeddings_create_raises_error_async( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1878,21 +2851,41 @@ async def test_embeddings_create_raises_error_async( side_effect=OpenAIError("API rate limit reached") ) - with pytest.raises(OpenAIError): - await client.embeddings.create(input="hello", model="text-embedding-3-large") + if stream_gen_ai_spans: + items = capture_items("event") + + with pytest.raises(OpenAIError): + await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + (event,) = (item.payload for item in items if item.type == "event") + else: + events = capture_events() + + with pytest.raises(OpenAIError): + await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + (event,) = events - (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_origin_nonstreaming_chat( - sentry_init, capture_items, nonstreaming_chat_completions_model_response + sentry_init, + capture_events, + capture_items, + nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -1909,27 +2902,47 @@ def test_span_origin_nonstreaming_chat( ) ) - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_origin_nonstreaming_chat_async( - sentry_init, capture_items, nonstreaming_chat_completions_model_response + sentry_init, + capture_events, + capture_items, + nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( @@ -1946,24 +2959,45 @@ async def test_span_origin_nonstreaming_chat_async( ) ) - with start_transaction(name="openai tx"): - await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="openai tx"): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + events = capture_events() - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + with start_transaction(name="openai tx"): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" -def test_span_origin_streaming_chat(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_origin_streaming_chat( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="z") returned_stream = Stream(cast_to=None, response=None, client=client) @@ -2003,6 +3037,11 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): ), ] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + client.chat.completions._post = mock.Mock(return_value=returned_stream) with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( @@ -2011,22 +3050,33 @@ def test_span_origin_streaming_chat(sentry_init, capture_items): "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_origin_streaming_chat_async( - sentry_init, capture_items, async_iterator + sentry_init, + capture_events, + capture_items, + async_iterator, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) @@ -2071,6 +3121,12 @@ async def test_span_origin_streaming_chat_async( ) client.chat.completions._post = AsyncMock(return_value=returned_stream) + + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", messages=[{"role": "system", "content": "hello"}] @@ -2080,19 +3136,31 @@ async def test_span_origin_streaming_chat_async( # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = (item.payload for item in items if item.type == "transaction") - assert event["contexts"]["trace"]["origin"] == "manual" + if stream_gen_ai_spans: + (event,) = (item.payload for item in items if item.type == "transaction") + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" -def test_span_origin_embeddings(sentry_init, capture_items): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_origin_embeddings( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="z") @@ -2107,23 +3175,41 @@ def test_span_origin_embeddings(sentry_init, capture_items): ) client.embeddings._post = mock.Mock(return_value=returned_embedding) + + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(name="openai tx"): client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = [item.payload for item in items if item.type == "transaction"] - assert event["contexts"]["trace"]["origin"] == "manual" + if stream_gen_ai_spans: + (event,) = [item.payload for item in items if item.type == "transaction"] + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_span_origin_embeddings_async(sentry_init, capture_items): +async def test_span_origin_embeddings_async( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") @@ -2138,14 +3224,26 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items): ) client.embeddings._post = AsyncMock(return_value=returned_embedding) + + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() + with start_transaction(name="openai tx"): await client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = [item.payload for item in items if item.type == "transaction"] - assert event["contexts"]["trace"]["origin"] == "manual" + if stream_gen_ai_spans: + (event,) = [item.payload for item in items if item.type == "transaction"] + assert event["contexts"]["trace"]["origin"] == "manual" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + else: + (event,) = events - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.openai" def test_completions_token_usage_from_response(): @@ -2510,61 +3608,111 @@ def count_tokens(msg): ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): +def test_ai_client_span_responses_api_no_pii( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - with start_transaction(name="openai tx"): - client.responses.create( - model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", - max_output_tokens=100, - temperature=0.7, - top_p=0.9, - ) + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) - spans = [item.payload for item in items if item.type == "span"] - - assert len(spans) == 1 - assert spans[0]["attributes"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.max_tokens": 100, - "gen_ai.request.temperature": 0.7, - "gen_ai.request.top_p": 0.9, - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", - "gen_ai.response.streaming": False, - "gen_ai.system": "openai", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "sentry.environment": "production", - "sentry.op": "gen_ai.responses", - "sentry.origin": "auto.ai.openai", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "openai tx", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - assert "gen_ai.system_instructions" not in spans[0]["attributes"] - assert "gen_ai.request.messages" not in spans[0]["attributes"] - assert "gen_ai.response.text" not in spans[0]["attributes"] + spans = [item.payload for item in items if item.type == "span"] + + assert len(spans) == 1 + assert spans[0]["attributes"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert "gen_ai.system_instructions" not in spans[0]["attributes"] + assert "gen_ai.request.messages" not in spans[0]["attributes"] + assert "gen_ai.response.text" not in spans[0]["attributes"] + else: + events = capture_events() + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert "gen_ai.system_instructions" not in spans[0]["data"] + assert "gen_ai.request.messages" not in spans[0]["data"] + assert "gen_ai.response.text" not in spans[0]["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "instructions", ( @@ -2631,58 +3779,105 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_ai_client_span_responses_api( - sentry_init, capture_items, instructions, input, request + sentry_init, + capture_events, + capture_items, + instructions, + input, + request, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - with start_transaction(name="openai tx"): - client.responses.create( - model="gpt-4o", - instructions=instructions, - input=input, - max_output_tokens=100, - temperature=0.7, - top_p=0.9, - ) + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + spans = [item.payload for item in items if item.type == "span"] + + assert len(spans) == 1 + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) - spans = [item.payload for item in items if item.type == "span"] - - assert len(spans) == 1 - - expected_data = { - "gen_ai.operation.name": "responses", - "gen_ai.request.max_tokens": 100, - "gen_ai.request.temperature": 0.7, - "gen_ai.request.top_p": 0.9, - "gen_ai.system": "openai", - "gen_ai.response.model": "response-model-id", - "gen_ai.response.streaming": False, - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.text": "the model response", - "sentry.environment": "production", - "sentry.op": "gen_ai.responses", - "sentry.origin": "auto.ai.openai", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "openai tx", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } param_id = request.node.callspec.id if "string" in param_id and ( @@ -2838,9 +4033,13 @@ def test_ai_client_span_responses_api( } ) - assert spans[0]["attributes"] == expected_data + if stream_gen_ai_spans: + assert spans[0]["attributes"] == expected_data + else: + assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "conversation, expected_id", [ @@ -2852,71 +4051,125 @@ def test_ai_client_span_responses_api( ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_responses_api_conversation_id( - sentry_init, capture_items, conversation, expected_id + sentry_init, + capture_events, + capture_items, + conversation, + expected_id, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - with start_transaction(name="openai tx"): - client.responses.create( - model="gpt-4o", - input="hello", - conversation=conversation, - ) + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + input="hello", + conversation=conversation, + ) - (span,) = (item.payload for item in items if item.type == "span") + (span,) = (item.payload for item in items if item.type == "span") - if expected_id is None: - assert "gen_ai.conversation.id" not in span["attributes"] + if expected_id is None: + assert "gen_ai.conversation.id" not in span["attributes"] + else: + assert span["attributes"]["gen_ai.conversation.id"] == expected_id else: - assert span["attributes"]["gen_ai.conversation.id"] == expected_id + events = capture_events() + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + input="hello", + conversation=conversation, + ) + + (transaction,) = events + (span,) = transaction["spans"] + + if expected_id is None: + assert "gen_ai.conversation.id" not in span["data"] + else: + assert span["data"]["gen_ai.conversation.id"] == expected_id +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_error_in_responses_api(sentry_init, capture_items): +def test_error_in_responses_api( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") client = OpenAI(api_key="z") client.responses._post = mock.Mock( side_effect=OpenAIError("API rate limit reached") ) - with start_transaction(name="openai tx"): - with pytest.raises(OpenAIError): + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with start_transaction(name="openai tx"), pytest.raises(OpenAIError): client.responses.create( model="gpt-4o", instructions="You are a coding assistant that talks like a pirate.", input="How do I check if a Python object is an instance of a class?", ) - # make sure the span where the error occurred is captured - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + # make sure the span where the error occurred is captured + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" - (error_event,) = (item.payload for item in items if item.type == "event") - assert error_event["level"] == "error" - assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) + else: + events = capture_events() + + with start_transaction(name="openai tx"), pytest.raises(OpenAIError): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (error_event, transaction_event) = events + + assert transaction_event["type"] == "transaction" + # make sure the span where the error occurred is captured + assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" - (transaction_event,) = ( - item.payload for item in items if item.type == "transaction" - ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @pytest.mark.parametrize( @@ -2984,59 +4237,107 @@ def test_error_in_responses_api(sentry_init, capture_items): ], ) async def test_ai_client_span_responses_async_api( - sentry_init, capture_items, instructions, input, request + sentry_init, + capture_events, + capture_items, + instructions, + input, + request, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) - with start_transaction(name="openai tx"): - await client.responses.create( - model="gpt-4o", - instructions=instructions, - input=input, - max_output_tokens=100, - temperature=0.7, - top_p=0.9, - ) + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + await client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + spans = [item.payload for item in items if item.type == "span"] + + assert len(spans) == 1 + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + events = capture_events() + + with start_transaction(name="openai tx"): + await client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) - spans = [item.payload for item in items if item.type == "span"] - - assert len(spans) == 1 - - expected_data = { - "gen_ai.operation.name": "responses", - "gen_ai.request.max_tokens": 100, - "gen_ai.request.temperature": 0.7, - "gen_ai.request.top_p": 0.9, - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", - "gen_ai.response.streaming": False, - "gen_ai.system": "openai", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "sentry.environment": "production", - "sentry.op": "gen_ai.responses", - "sentry.origin": "auto.ai.openai", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "openai tx", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } param_id = request.node.callspec.id if "string" in param_id and ( @@ -3192,9 +4493,13 @@ async def test_ai_client_span_responses_async_api( } ) - assert spans[0]["attributes"] == expected_data + if stream_gen_ai_spans: + assert spans[0]["attributes"] == expected_data + else: + assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "instructions", @@ -3263,6 +4568,7 @@ async def test_ai_client_span_responses_async_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( sentry_init, + capture_events, capture_items, instructions, input, @@ -3270,25 +4576,28 @@ async def test_ai_client_span_streaming_responses_async_api( get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - with mock.patch.object( - client.responses._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): result = await client.responses.create( model="gpt-4o", instructions=instructions, @@ -3301,40 +4610,89 @@ async def test_ai_client_span_streaming_responses_async_api( async for _ in result: pass - spans = [item.payload for item in items if item.type == "span"] - spans = [ - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES - ] + spans = [item.payload for item in items if item.type == "span"] + spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES + ] + + assert len(spans) == 1 + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": True, + "gen_ai.system": "openai", + "gen_ai.response.time_to_first_token": mock.ANY, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "hello world", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + events = capture_events() + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + result = await client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + stream=True, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + async for _ in result: + pass + + (transaction,) = events + spans = [ + span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES + ] - assert len(spans) == 1 - - expected_data = { - "gen_ai.operation.name": "responses", - "gen_ai.request.max_tokens": 100, - "gen_ai.request.temperature": 0.7, - "gen_ai.request.top_p": 0.9, - "gen_ai.response.model": "response-model-id", - "gen_ai.response.streaming": True, - "gen_ai.system": "openai", - "gen_ai.response.time_to_first_token": mock.ANY, - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.text": "hello world", - "sentry.environment": "production", - "sentry.op": "gen_ai.responses", - "sentry.origin": "auto.ai.openai", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "openai tx", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + assert len(spans) == 1 + assert spans[0]["origin"] == "auto.ai.openai" + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": True, + "gen_ai.system": "openai", + "gen_ai.response.time_to_first_token": mock.ANY, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "hello world", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } param_id = request.node.callspec.id if "string" in param_id and ( @@ -3490,43 +4848,74 @@ async def test_ai_client_span_streaming_responses_async_api( } ) - assert spans[0]["attributes"] == expected_data + if stream_gen_ai_spans: + assert spans[0]["attributes"] == expected_data + else: + assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_error_in_responses_async_api(sentry_init, capture_items): +async def test_error_in_responses_async_api( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock( side_effect=OpenAIError("API rate limit reached") ) - with start_transaction(name="openai tx"): - with pytest.raises(OpenAIError): + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with start_transaction(name="openai tx"), pytest.raises(OpenAIError): await client.responses.create( model="gpt-4o", instructions="You are a coding assistant that talks like a pirate.", input="How do I check if a Python object is an instance of a class?", ) - # make sure the span where the error occurred is captured - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + # make sure the span where the error occurred is captured + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" - (error_event,) = (item.payload for item in items if item.type == "event") - assert error_event["level"] == "error" - assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) + else: + events = capture_events() + + with start_transaction(name="openai tx"), pytest.raises(OpenAIError): + await client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (error_event, transaction_event) = events + + assert transaction_event["type"] == "transaction" + # make sure the span where the error occurred is captured + assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" - (transaction_event,) = ( - item.payload for item in items if item.type == "transaction" - ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] @@ -3606,6 +4995,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): ] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], @@ -3613,11 +5003,13 @@ async def test_error_in_responses_async_api(sentry_init, capture_items): @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -3627,8 +5019,8 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3637,12 +5029,14 @@ def test_streaming_responses_api( ) ) - with mock.patch.object( - client.responses._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.responses.create( model="some-model", input="hello", @@ -3657,29 +5051,74 @@ def test_streaming_responses_api( if hasattr(item, "delta"): response_string += item.delta - assert response_string == "hello world" + assert response_string == "hello world" + + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - (span,) = (item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.responses" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + if send_default_pii and include_prompts: + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - if send_default_pii and include_prompts: - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.responses.create( + model="some-model", + input="hello", + stream=True, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + response_string = "" + for item in response_stream: + if hasattr(item, "delta"): + response_string += item.delta + + assert response_string == "hello world" + + (transaction,) = events + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.responses" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + if send_default_pii and include_prompts: + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -3688,12 +5127,14 @@ def test_streaming_responses_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_async( sentry_init, + capture_events, capture_items, send_default_pii, include_prompts, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): sentry_init( integrations=[ @@ -3703,20 +5144,22 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - with mock.patch.object( - client.responses._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.responses.create( model="some-model", input="hello", @@ -3731,29 +5174,74 @@ async def test_streaming_responses_api_async( if hasattr(item, "delta"): response_string += item.delta - assert response_string == "hello world" + assert response_string == "hello world" - (span,) = (item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.responses" - assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" - if send_default_pii and include_prompts: - assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + if send_default_pii and include_prompts: + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + events = capture_events() + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.responses.create( + model="some-model", + input="hello", + stream=True, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + response_string = "" + async for item in response_stream: + if hasattr(item, "delta"): + response_string += item.delta + + assert response_string == "hello world" + + (transaction,) = events + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.responses" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 - assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + if send_default_pii and include_prompts: + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the tools parameter.", @@ -3763,13 +5251,18 @@ async def test_streaming_responses_api_async( [[], None, NOT_GIVEN, omit], ) def test_empty_tools_in_chat_completion( - sentry_init, capture_items, tools, nonstreaming_chat_completions_model_response + sentry_init, + capture_events, + capture_items, + tools, + nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -3786,19 +5279,37 @@ def test_empty_tools_in_chat_completion( ) ) - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=[{"role": "system", "content": "hello"}], - tools=tools, - ) + if stream_gen_ai_spans: + items = capture_items("span") - span = next(item.payload for item in items if item.type == "span") + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + tools=tools, + ) + + span = next(item.payload for item in items if item.type == "span") + + assert "gen_ai.request.available_tools" not in span["attributes"] + else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + tools=tools, + ) - assert "gen_ai.request.available_tools" not in span["attributes"] + (event,) = events + span = event["spans"][0] + + assert "gen_ai.request.available_tools" not in span["data"] # Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "test_message,expected_role", [ @@ -3815,10 +5326,12 @@ def test_empty_tools_in_chat_completion( ) def test_openai_message_role_mapping( sentry_init, + capture_events, capture_items, test_message, expected_role, nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" @@ -3826,8 +5339,8 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -3846,32 +5359,53 @@ def test_openai_message_role_mapping( test_messages = [test_message] - with start_transaction(name="openai tx"): - client.chat.completions.create(model="test-model", messages=test_messages) - # Verify that the span was created correctly - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + if stream_gen_ai_spans: + items = capture_items("span") + + with start_transaction(name="openai tx"): + client.chat.completions.create(model="test-model", messages=test_messages) - # Parse the stored messages - import json + # Verify that the span was created correctly + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) + else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.chat.completions.create(model="test-model", messages=test_messages) + + # Verify that the span was created correctly + (event,) = events + span = event["spans"][0] + assert span["op"] == "gen_ai.chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == expected_role +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_openai_message_truncation( - sentry_init, capture_items, nonstreaming_chat_completions_model_response + sentry_init, + capture_events, + capture_items, + nonstreaming_chat_completions_model_response, + stream_gen_ai_spans, ): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -3898,23 +5432,48 @@ def test_openai_message_truncation( {"role": "user", "content": large_content}, ] - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) - span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + (event,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) + + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) - (event,) = (item.payload for item in items if item.type == "transaction") meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] @@ -3922,8 +5481,14 @@ def test_openai_message_truncation( # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_streaming_chat_completion_ttft( - sentry_init, capture_items, get_model_response, server_side_event_chunks + sentry_init, + capture_events, + capture_items, + get_model_response, + server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that streaming chat completions capture time-to-first-token (TTFT). @@ -3931,8 +5496,8 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3969,12 +5534,37 @@ def test_streaming_chat_completion_ttft( ), ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "Say hello"}], + stream=True, + ) + # Consume the stream + for _ in response_stream: + pass + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "Say hello"}], @@ -3984,24 +5574,29 @@ def test_streaming_chat_completion_ttft( for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" + (tx,) = events + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] + ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] - # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] - ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_streaming_chat_completion_ttft_async( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that async streaming chat completions capture time-to-first-token (TTFT). @@ -4009,8 +5604,8 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -4049,12 +5644,37 @@ async def test_streaming_chat_completion_ttft_async( ) ) - with mock.patch.object( - client.chat._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "Say hello"}], + stream=True, + ) + # Consume the stream + async for _ in response_stream: + pass + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + else: + events = capture_events() + + with mock.patch.object( + client.chat._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", messages=[{"role": "user", "content": "Say hello"}], @@ -4064,20 +5684,28 @@ async def test_streaming_chat_completion_ttft_async( async for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.chat" + (tx,) = events + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] + ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] - # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] - ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api_ttft( - sentry_init, capture_items, get_model_response, server_side_event_chunks + sentry_init, + capture_events, + capture_items, + get_model_response, + server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that streaming responses API captures time-to-first-token (TTFT). @@ -4085,20 +5713,45 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( server_side_event_chunks(EXAMPLE_RESPONSES_STREAM) ) - with mock.patch.object( - client.responses._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = client.responses.create( + model="some-model", + input="hello", + stream=True, + ) + # Consume the stream + for _ in response_stream: + pass + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + else: + events = capture_events() + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = client.responses.create( model="some-model", input="hello", @@ -4108,25 +5761,30 @@ def test_streaming_responses_api_ttft( for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.responses" + (tx,) = events + span = tx["spans"][0] + assert span["op"] == "gen_ai.responses" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] + ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] - # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] - ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 # noinspection PyTypeChecker +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_ttft_async( sentry_init, + capture_events, capture_items, get_model_response, async_iterator, server_side_event_chunks, + stream_gen_ai_spans, ): """ Test that async streaming responses API captures time-to-first-token (TTFT). @@ -4134,20 +5792,45 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - with mock.patch.object( - client.responses._client._client, - "send", - return_value=returned_stream, - ): - with start_transaction(name="openai tx"): + if stream_gen_ai_spans: + items = capture_items("span") + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): + response_stream = await client.responses.create( + model="some-model", + input="hello", + stream=True, + ) + # Consume the stream + async for _ in response_stream: + pass + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + else: + events = capture_events() + + with mock.patch.object( + client.responses._client._client, + "send", + return_value=returned_stream, + ), start_transaction(name="openai tx"): response_stream = await client.responses.create( model="some-model", input="hello", @@ -4157,11 +5840,13 @@ async def test_streaming_responses_api_ttft_async( async for _ in response_stream: pass - span = next(item.payload for item in items if item.type == "span") - assert span["attributes"]["sentry.op"] == "gen_ai.responses" + (tx,) = events + span = tx["spans"][0] + assert span["op"] == "gen_ai.responses" + + # Verify TTFT is captured + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] + ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] - # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] - ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index bde222274c..46196893d8 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -157,13 +157,16 @@ def test_agent_custom_model(): ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( sentry_init, + capture_events, capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) @@ -173,64 +176,125 @@ async def test_agent_invocation_span_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + items = capture_items("span", "transaction") + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + (transaction,) = (item.payload for item in items if item.type == "transaction") + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - items = capture_items("span", "transaction") + assert invoke_agent_span["name"] == "invoke_agent test_agent" + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) + assert "gen_ai.request.messages" not in invoke_agent_span["attributes"] + assert "gen_ai.response.text" not in invoke_agent_span["attributes"] - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] - assert "gen_ai.request.messages" not in invoke_agent_span["attributes"] - assert "gen_ai.response.text" not in invoke_agent_span["attributes"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] + assert "gen_ai.request.messages" not in invoke_agent_span["data"] + assert "gen_ai.response.text" not in invoke_agent_span["data"] - assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "instructions", @@ -309,6 +373,7 @@ async def test_agent_invocation_span_no_pii( ) async def test_agent_invocation_span( sentry_init, + capture_events, capture_items, test_agent_with_instructions, nonstreaming_responses_model_response, @@ -316,6 +381,7 @@ async def test_agent_invocation_span( input, request, get_model_response, + stream_gen_ai_spans, ): """ Test that the integration creates spans for agent invocations. @@ -328,176 +394,360 @@ async def test_agent_invocation_span( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent, - input, - run_config=test_run_config, - ) + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + (transaction,) = (item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span, ai_client_span = spans + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["name"] == "invoke_agent test_agent" + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span, ai_client_span = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" # Only first case checks "gen_ai.request.messages" until further input handling work. param_id = request.node.callspec.id if "string" in param_id and instructions is None: # type: ignore - assert "gen_ai.system_instructions" not in ai_client_span["attributes"] + if stream_gen_ai_spans: + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - assert invoke_agent_span["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, - ] - ) + assert invoke_agent_span["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "content": [{"text": "Test input", "type": "text"}], + "role": "user", + }, + ] + ) + else: + assert "gen_ai.system_instructions" not in ai_client_span["data"] + + assert invoke_agent_span["data"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "content": [{"text": "Test input", "type": "text"}], + "role": "user", + }, + ] + ) elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + if stream_gen_ai_spans: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + if stream_gen_ai_spans: + assert ( + invoke_agent_span["attributes"]["gen_ai.response.text"] + == "Hello, how can I help you?" ) else: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] + assert ( + invoke_agent_span["data"]["gen_ai.response.text"] + == "Hello, how can I help you?" ) - assert ( - invoke_agent_span["attributes"]["gen_ai.response.text"] - == "Hello, how can I help you?" - ) - - assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - + if stream_gen_ai_spans: + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + else: + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_client_span_custom_model( sentry_init, + capture_events, capture_items, test_agent_custom_model, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): """ Test that the integration uses the correct model name if a custom model is used. @@ -511,40 +761,78 @@ async def test_client_span_custom_model( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span") + items = capture_items("span") - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert ai_client_span["name"] == "chat my-custom-model" - assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model" + assert ai_client_span["name"] == "chat my-custom-model" + assert ( + ai_client_span["attributes"]["gen_ai.request.model"] + == "my-custom-model" + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + assert ai_client_span["description"] == "chat my-custom-model" + assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_agent_invocation_span_sync_no_pii( sentry_init, + capture_events, capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): """ Test that the integration creates spans for agent invocations. @@ -557,59 +845,127 @@ def test_agent_invocation_span_sync_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config) + result = agents.Runner.run_sync( + agent, "Test input", run_config=test_run_config + ) - assert result is not None - assert result.final_output == "Hello, how can I help you?" + assert result is not None + assert result.final_output == "Hello, how can I help you?" - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + (transaction,) = ( + item.payload for item in items if item.type == "transaction" + ) - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] + == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS + not in invoke_agent_span["attributes"] + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = agents.Runner.run_sync( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "instructions", ( @@ -659,202 +1015,362 @@ def test_agent_invocation_span_sync_no_pii( ], }, { - "role": "user", - "content": "Test input", - }, - ], - id="parts_no_type", - ), - pytest.param( - [ - { - "type": "message", - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful assistant."}, - {"type": "text", "text": "Be concise and clear."}, - ], - }, - { - "type": "message", - "role": "user", - "content": "Test input", - }, - ], - id="parts", - ), - ], -) -def test_agent_invocation_span_sync( - sentry_init, - capture_items, - test_agent_with_instructions, - nonstreaming_responses_model_response, - instructions, - input, - request, - get_model_response, -): - """ - Test that the integration creates spans for agent invocations. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent_with_instructions(instructions).clone(model=model) - - response = get_model_response( - nonstreaming_responses_model_response, serialize_pydantic=True - ) - - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) - - items = capture_items("span", "transaction") - - result = agents.Runner.run_sync( - agent, - input, - run_config=test_run_config, - ) - - assert result is not None - assert result.final_output == "Hello, how can I help you?" - - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span, ai_client_span = spans - - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore - assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", + "role": "user", + "content": "Test input", }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( + ], + id="parts_no_type", + ), + pytest.param( [ { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - else: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", + "type": "message", + "role": "user", + "content": "Test input", }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] + ], + id="parts", + ), + ], +) +def test_agent_invocation_span_sync( + sentry_init, + capture_events, + capture_items, + test_agent_with_instructions, + nonstreaming_responses_model_response, + instructions, + input, + request, + get_model_response, + stream_gen_ai_spans, +): + """ + Test that the integration creates spans for agent invocations. + """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent_with_instructions(instructions).clone(model=model) + + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) + + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + items = capture_items("span", "transaction") + + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span, ai_client_span = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] + elif "string" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) + elif "blocks_no_type" in param_id and instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks_no_type" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id and instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "parts_no_type" in param_id and instructions is None: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif "parts_no_type" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore + assert "gen_ai.system_instructions" not in ai_client_span["data"] + elif "string" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) + elif "blocks_no_type" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks_no_type" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "parts_no_type" in param_id and instructions is None: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif "parts_no_type" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_handoff_span(sentry_init, capture_items, get_model_response): +async def test_handoff_span( + sentry_init, + capture_events, + capture_items, + get_model_response, + stream_gen_ai_spans, +): """ Test that handoff spans are created when agents hand off to other agents. """ @@ -947,42 +1463,85 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response): serialize_pydantic=True, ) - with patch.object( - primary_agent.model._client._client, - "send", - side_effect=[handoff_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + primary_agent.model._client._client, + "send", + side_effect=[handoff_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("transaction", "span") + items = capture_items("transaction", "span") - result = await agents.Runner.run( - primary_agent, - "Please hand off to secondary agent", - run_config=test_run_config, - ) + result = await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - handoff_span = next( - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF - ) + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) + + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["name"] == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + else: + with patch.object( + primary_agent.model._client._client, + "send", + side_effect=[handoff_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + handoff_span = next( + span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF + ) - # Verify handoff span was created - assert handoff_span is not None - assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" - assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] + == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_max_turns_before_handoff_span( - sentry_init, capture_items, get_model_response + sentry_init, + capture_events, + capture_items, + get_model_response, + stream_gen_ai_spans, ): """ Example raising agents.exceptions.AgentsException after the agent invocation span is complete. @@ -1076,46 +1635,87 @@ async def test_max_turns_before_handoff_span( serialize_pydantic=True, ) - with patch.object( - primary_agent.model._client._client, - "send", - side_effect=[handoff_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + primary_agent.model._client._client, + "send", + side_effect=[handoff_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("transaction", "span") + items = capture_items("transaction", "span") - with pytest.raises(MaxTurnsExceeded): - await agents.Runner.run( - primary_agent, - "Please hand off to secondary agent", - run_config=test_run_config, - max_turns=1, + with pytest.raises(MaxTurnsExceeded): + await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + max_turns=1, + ) + + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF ) - spans = [item.payload for item in items if item.type == "span"] - handoff_span = next( - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF - ) + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["name"] == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + else: + with patch.object( + primary_agent.model._client._client, + "send", + side_effect=[handoff_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + with pytest.raises(MaxTurnsExceeded): + await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + max_turns=1, + ) - # Verify handoff span was created - assert handoff_span is not None - assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" - assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + (error, transaction) = events + spans = transaction["spans"] + handoff_span = next( + span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF + ) + + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] + == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, + capture_events, capture_items, test_agent, get_model_response, responses_tool_call_model_responses, + stream_gen_ai_spans, ): """ Test tool execution span creation. @@ -1182,9 +1782,13 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + else: + events = capture_events() await agents.Runner.run( agent_with_tool, @@ -1192,24 +1796,35 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + if stream_gen_ai_spans: + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - spans = [item.payload for item in items if item.type == "span"] - agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) - tool_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL - ) + spans = [item.payload for item in items if item.type == "span"] + agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + ) + else: + (transaction,) = events + spans = transaction["spans"] + agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) available_tool = { "name": "simple_test_tool", @@ -1249,53 +1864,107 @@ def simple_test_tool(message: str) -> str: } ) - assert agent_span["name"] == "invoke_agent test_agent" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + if stream_gen_ai_spans: + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - agent_span_available_tool = json.loads( - agent_span["attributes"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + agent_span_available_tool = json.loads( + agent_span["attributes"]["gen_ai.request.available_tools"] + )[0] + else: + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["attributes"]["gen_ai.system"] == "openai" + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert ai_client_span1["name"] == "chat gpt-4" - assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - ai_client_span1_available_tool = json.loads( - ai_client_span1["attributes"]["gen_ai.request.available_tools"] - )[0] + if stream_gen_ai_spans: + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" + + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["attributes"]["gen_ai.request.available_tools"] + )[0] + else: + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] assert all( ai_client_span1_available_tool[k] == v for k, v in available_tool.items() ) - assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["attributes"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + if stream_gen_ai_spans: + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ( + ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + else: + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 tool_call = { "arguments": '{"message": "hello"}', @@ -1309,67 +1978,135 @@ def simple_test_tool(message: str) -> str: if OPENAI_VERSION >= (2, 25, 0): tool_call["namespace"] = None - assert json.loads(ai_client_span1["attributes"]["gen_ai.response.tool_calls"]) == [ - tool_call - ] + if stream_gen_ai_spans: + assert json.loads( + ai_client_span1["attributes"]["gen_ai.response.tool_calls"] + ) == [tool_call] + else: + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call + ] + + if stream_gen_ai_spans: + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - assert tool_span["name"] == "execute_tool simple_test_tool" - assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + tool_span_available_tool = json.loads( + tool_span["attributes"]["gen_ai.request.available_tools"] + )[0] + else: + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - tool_span_available_tool = json.loads( - tool_span["attributes"]["gen_ai.request.available_tools"] - )[0] + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] + )[0] assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["attributes"]["gen_ai.system"] == "openai" - assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["name"] == "chat gpt-4" - assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - - ai_client_span2_available_tool = json.loads( - ai_client_span2["attributes"]["gen_ai.request.available_tools"] - )[0] + if stream_gen_ai_spans: + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" + assert ( + tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" + ) + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] + else: + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["data"]["gen_ai.request.available_tools"] + )[0] assert all( ai_client_span2_available_tool[k] == v for k, v in available_tool.items() ) - assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["attributes"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["attributes"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 + if stream_gen_ai_spans: + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 + else: + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio @@ -1626,8 +2363,15 @@ async def test_hosted_mcp_tool_propagation_headers( assert hosted_mcp_tool["headers"]["baggage"] == expected_outgoing_baggage +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_model_behavior_error(sentry_init, capture_items, test_agent): +async def test_model_behavior_error( + sentry_init, + capture_events, + capture_items, + test_agent, + stream_gen_ai_spans, +): """ Example raising agents.exceptions.AgentsException before the agent invocation span is complete. The mocked API response indicates that "wrong_tool" was called. @@ -1641,63 +2385,129 @@ def simple_test_tool(message: str) -> str: # Create agent with the tool agent_with_tool = test_agent.clone(tools=[simple_test_tool]) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a mock response that includes tool calls - tool_call = ResponseFunctionToolCall( - id="call_123", - call_id="call_123", - name="wrong_tool", - type="function_call", - arguments='{"message": "hello"}', - ) - - tool_response = ModelResponse( - output=[tool_call], - usage=Usage( - requests=1, input_tokens=10, output_tokens=5, total_tokens=15 - ), - response_id="resp_tool_123", - ) + if stream_gen_ai_spans: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) - mock_get_response.side_effect = [tool_response] + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, + mock_get_response.side_effect = [tool_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + items = capture_items("span", "transaction") + + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = ( + item.payload for item in items if item.type == "transaction" ) + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - items = capture_items("span", "transaction") + spans = [item.payload for item in items if item.type == "span"] - with pytest.raises(ModelBehaviorError): - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, + ( + agent_span, + ai_client_span1, + ) = spans + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "error" + else: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', ) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) - spans = [item.payload for item in items if item.type == "span"] - ( - agent_span, - ai_client_span1, - ) = spans + mock_get_response.side_effect = [tool_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (error, transaction) = events + spans = transaction["spans"] + ( + agent_span, + ai_client_span1, + ) = spans - assert agent_span["name"] == "invoke_agent test_agent" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - # Error due to unrecognized tool in model response. - assert agent_span["status"] == "error" + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "internal_error" + assert agent_span["tags"]["status"] == "internal_error" + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_error_handling(sentry_init, capture_items, test_agent): +async def test_error_handling( + sentry_init, + capture_events, + capture_items, + test_agent, + stream_gen_ai_spans, +): """ Test error handling in agent execution. """ @@ -1714,37 +2524,100 @@ async def test_error_handling(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "span", "transaction") + if stream_gen_ai_spans: + items = capture_items("event", "span", "transaction") - with pytest.raises(Exception, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error_event,) = ( + item.payload for item in items if item.type == "event" + ) + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) + + (transaction,) = ( + item.payload for item in items if item.type == "transaction" + ) + + assert transaction["transaction"] == "test_agent workflow" + assert ( + transaction["contexts"]["trace"]["origin"] + == "auto.ai.openai_agents" + ) + + spans = [item.payload for item in items if item.type == "span"] + (invoke_agent_span, ai_client_span) = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["sentry.origin"] + == "auto.ai.openai_agents" ) - (error_event,) = (item.payload for item in items if item.type == "event") - assert error_event["exception"]["values"][0]["type"] == "Exception" - assert error_event["exception"]["values"][0]["value"] == "Model Error" - assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents" + assert ai_client_span["name"] == "chat gpt-4" + assert ( + ai_client_span["attributes"]["sentry.origin"] + == "auto.ai.openai_agents" + ) + assert ai_client_span["status"] == "error" + else: + events = capture_events() + + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + ( + error_event, + transaction, + ) = events + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + spans = transaction["spans"] + (invoke_agent_span, ai_client_span) = spans - spans = [item.payload for item in items if item.type == "span"] - (invoke_agent_span, ai_client_span) = spans + assert transaction["transaction"] == "test_agent workflow" + assert ( + transaction["contexts"]["trace"]["origin"] + == "auto.ai.openai_agents" + ) - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["origin"] == "auto.ai.openai_agents" - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - assert ai_client_span["status"] == "error" + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "internal_error" + assert ai_client_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_error_captures_input_data(sentry_init, capture_items, test_agent): +async def test_error_captures_input_data( + sentry_init, + capture_events, + capture_items, + test_agent, + stream_gen_ai_spans, +): """ Test that input data is captured even when the API call raises an exception. This verifies that _set_input_data is called before the API call. @@ -1775,36 +2648,69 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent) ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "span") + if stream_gen_ai_spans: + items = capture_items("event", "span") + else: + events = capture_events() with pytest.raises(InternalServerError, match="Error code: 500"): await agents.Runner.run(agent, "Test input", run_config=test_run_config) - (error_event,) = (item.payload for item in items if item.type == "event") + if stream_gen_ai_spans: + (error_event,) = (item.payload for item in items if item.type == "event") + else: + ( + error_event, + transaction, + ) = events + assert error_event["exception"]["values"][0]["type"] == "InternalServerError" assert error_event["exception"]["values"][0]["value"] == "Error code: 500" - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ][0] + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ][0] + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["status"] == "error" + + assert "gen_ai.request.messages" in ai_client_span["attributes"] + else: + spans = transaction["spans"] + ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0] - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["status"] == "error" + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["status"] == "internal_error" + assert ai_client_span["tags"]["status"] == "internal_error" - assert "gen_ai.request.messages" in ai_client_span["attributes"] + assert "gen_ai.request.messages" in ai_client_span["data"] request_messages = safe_serialize( [ {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, ] ) - assert ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages + if stream_gen_ai_spans: + assert ( + ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages + ) + else: + assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_span_status_error(sentry_init, capture_items, test_agent): +async def test_span_status_error( + sentry_init, + capture_events, + capture_items, + test_agent, + stream_gen_ai_spans, +): with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): with patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" @@ -1817,28 +2723,51 @@ async def test_span_status_error(sentry_init, capture_items, test_agent): LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") - with pytest.raises(ValueError, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = ( + item.payload for item in items if item.type == "transaction" ) + else: + events = capture_events() - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" - (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_mcp_tool_execution_spans( - sentry_init, capture_items, test_agent, get_model_response + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + stream_gen_ai_spans, ): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. @@ -1930,45 +2859,88 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + agent, + "Please use MCP tool", + run_config=test_run_config, + ) - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "search term"}' - ) - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) + spans = [item.payload for item in items if item.type == "span"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("name") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break + + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" + assert ( + mcp_tool_span["attributes"]["gen_ai.tool.input"] + == '{"query": "search term"}' + ) + assert ( + mcp_tool_span["attributes"]["gen_ai.tool.output"] + == "MCP tool executed successfully" + ) + + # Verify no error status since error was None + assert mcp_tool_span.get("status") != "error" + assert mcp_tool_span.get("tags", {}).get("status") != "error" + else: + events = capture_events() - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "error" - assert mcp_tool_span.get("tags", {}).get("status") != "error" + await agents.Runner.run( + agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("description") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break + + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + assert ( + mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' + ) + assert ( + mcp_tool_span["data"]["gen_ai.tool.output"] + == "MCP tool executed successfully" + ) + # Verify no error status since error was None + assert mcp_tool_span.get("status") != "internal_error" + assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_mcp_tool_execution_with_error( - sentry_init, capture_items, test_agent, get_model_response + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + stream_gen_ai_spans, ): """ Test that MCP tool calls with errors are tracked with error status. @@ -2060,9 +3032,13 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") + else: + events = capture_events() await agents.Runner.run( agent, @@ -2070,29 +3046,57 @@ async def test_mcp_tool_execution_with_error( run_config=test_run_config, ) - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + spans = [item.payload for item in items if item.type == "span"] + + # Find the MCP execute_tool span with error + mcp_tool_span = None + for span in spans: + if span.get("name") == "execute_tool failing_mcp_tool": + mcp_tool_span = span + break + + # Verify the MCP tool span was created with error status + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' + assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None" + + # Verify error status was set + assert mcp_tool_span["status"] == "error" + else: + (transaction,) = events + spans = transaction["spans"] - # Find the MCP execute_tool span with error - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool failing_mcp_tool": - mcp_tool_span = span - break + # Find the MCP execute_tool span with error + mcp_tool_span = None + for span in spans: + if span.get("description") == "execute_tool failing_mcp_tool": + mcp_tool_span = span + break - # Verify the MCP tool span was created with error status - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None" + # Verify the MCP tool span was created with error status + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' + assert mcp_tool_span["data"]["gen_ai.tool.output"] is None - # Verify error status was set - assert mcp_tool_span["status"] == "error" + # Verify error status was set + assert mcp_tool_span["status"] == "internal_error" + assert mcp_tool_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_mcp_tool_execution_without_pii( - sentry_init, capture_items, test_agent, get_model_response + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + stream_gen_ai_spans, ): """ Test that MCP tool input/output are not included when send_default_pii is False. @@ -2184,42 +3188,74 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + spans = [item.payload for item in items if item.type == "span"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("name") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break + + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" + + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] + assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] + else: + events = capture_events() + + await agents.Runner.run( + agent, + "Please use MCP tool", + run_config=test_run_config, + ) - spans = [item.payload for item in items if item.type == "span"] + (transaction,) = events + spans = transaction["spans"] - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("description") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] - assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["data"] + assert "gen_ai.tool.output" not in mcp_tool_span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_asyncio( sentry_init, + capture_events, capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): """ Test that multiple agents can be run at the same time in asyncio tasks @@ -2241,10 +3277,9 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") - async def run(): await agents.Runner.run( starting_agent=agent, @@ -2252,12 +3287,31 @@ async def run(): run_config=test_run_config, ) - await asyncio.gather(*[run() for _ in range(3)]) + if stream_gen_ai_spans: + items = capture_items("span", "transaction") + + await asyncio.gather(*[run() for _ in range(3)]) + + txn1, txn2, txn3 = ( + item.payload for item in items if item.type == "transaction" + ) + + assert txn1["transaction"] == "test_agent workflow" + assert txn2["transaction"] == "test_agent workflow" + else: + events = capture_events() + + await asyncio.gather(*[run() for _ in range(3)]) - txn1, txn2, txn3 = (item.payload for item in items if item.type == "transaction") + assert len(events) == 3 + txn1, txn2, txn3 = events + + assert txn1["type"] == "transaction" + assert txn1["transaction"] == "test_agent workflow" + assert txn2["type"] == "transaction" + assert txn2["transaction"] == "test_agent workflow" + assert txn3["type"] == "transaction" - assert txn1["transaction"] == "test_agent workflow" - assert txn2["transaction"] == "test_agent workflow" assert txn3["transaction"] == "test_agent workflow" @@ -2303,13 +3357,16 @@ def test_openai_agents_message_role_mapping( assert stored_messages[0]["role"] == expected_role +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_error_tracing( sentry_init, + capture_events, capture_items, test_agent, get_model_response, responses_tool_call_model_responses, + stream_gen_ai_spans, ): """ Test that tool execution errors are properly tracked via error tracing patch. @@ -2383,46 +3440,84 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") - # Note: The agents library catches tool exceptions internally, - # so we don't expect this to raise - await agents.Runner.run( - agent_with_tool, - "Please use the failing tool", - run_config=test_run_config, - ) + # Note: The agents library catches tool exceptions internally, + # so we don't expect this to raise + await agents.Runner.run( + agent_with_tool, + "Please use the failing tool", + run_config=test_run_config, + ) + + spans = [item.payload for item in items if item.type == "span"] + + # Find the execute_tool span + execute_tool_span = None + for span in spans: + description = span.get("name", "") + if description is not None and description.startswith( + "execute_tool failing_tool" + ): + execute_tool_span = span + break + + # Verify the execute_tool span was created + assert execute_tool_span is not None, "execute_tool span was not created" + assert execute_tool_span["name"] == "execute_tool failing_tool" + assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool" + + # Verify error status was set (this is the key test for our patch) + # The span should be marked as error because the tool execution failed + assert execute_tool_span["status"] == "error" + else: + events = capture_events() + + # Note: The agents library catches tool exceptions internally, + # so we don't expect this to raise + await agents.Runner.run( + agent_with_tool, + "Please use the failing tool", + run_config=test_run_config, + ) - spans = [item.payload for item in items if item.type == "span"] + (transaction,) = events + spans = transaction["spans"] - # Find the execute_tool span - execute_tool_span = None - for span in spans: - description = span.get("name", "") - if description is not None and description.startswith( - "execute_tool failing_tool" - ): - execute_tool_span = span - break + # Find the execute_tool span + execute_tool_span = None + for span in spans: + description = span.get("description", "") + if description is not None and description.startswith( + "execute_tool failing_tool" + ): + execute_tool_span = span + break - # Verify the execute_tool span was created - assert execute_tool_span is not None, "execute_tool span was not created" - assert execute_tool_span["name"] == "execute_tool failing_tool" - assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool" + # Verify the execute_tool span was created + assert execute_tool_span is not None, "execute_tool span was not created" + assert execute_tool_span["description"] == "execute_tool failing_tool" + assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool" - # Verify error status was set (this is the key test for our patch) - # The span should be marked as error because the tool execution failed - assert execute_tool_span["status"] == "error" + # Verify error status was set (this is the key test for our patch) + # The span should be marked as error because the tool execution failed + assert execute_tool_span["status"] == "internal_error" + assert execute_tool_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( sentry_init, + capture_events, capture_items, test_agent, get_model_response, + stream_gen_ai_spans, ): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. @@ -2480,42 +3575,80 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + assert result is not None - assert result is not None + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) + # Verify invoke_agent span has usage data from context_wrapper + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + ) + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] + == 5 + ) + else: + events = capture_events() - # Verify invoke_agent span has usage data from context_wrapper - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5 + # Verify invoke_agent span has usage data from context_wrapper + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ( + invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( sentry_init, + capture_events, capture_items, test_agent, get_model_response, + stream_gen_ai_spans, ): """ Test that ai_client spans (gen_ai.chat) include the response model from the actual API response. @@ -2573,32 +3706,63 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + # Verify ai_client span has response model from API response + assert ai_client_span["name"] == "chat gpt-4" + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + events = capture_events() + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - # Verify ai_client span has response model from API response - assert ai_client_span["name"] == "chat gpt-4" - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + assert result is not None + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + + # Verify ai_client span has response model from API response + assert ai_client_span["description"] == "chat gpt-4" + assert "gen_ai.response.model" in ai_client_span["data"] + assert ( + ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( sentry_init, + capture_events, capture_items, get_model_response, + stream_gen_ai_spans, ): """ Test that response model is captured when using ChatCompletions API (not Responses API). @@ -2661,32 +3825,63 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span", "transaction") + if stream_gen_ai_spans: + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify response model from API response is captured - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ( - ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4o-mini-2024-07-18" - ) + # Verify response model from API response is captured + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4o-mini-2024-07-18" + ) + else: + events = capture_events() + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + assert result is not None + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + + # Verify response model from API response is captured + assert "gen_ai.response.model" in ai_client_span["data"] + assert ( + ai_client_span["data"]["gen_ai.response.model"] + == "gpt-4o-mini-2024-07-18" + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_llm_calls_aggregate_usage( - sentry_init, capture_items, test_agent, get_model_response + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + stream_gen_ai_spans, ): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls @@ -2774,47 +3969,91 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_call_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_call_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + items = capture_items("span", "transaction") + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) - items = capture_items("span", "transaction") + assert result is not None - result = await agents.Runner.run( - agent_with_tool, - "What is 5 + 3?", - run_config=test_run_config, + spans = [item.payload for item in items if item.type == "span"] + + invoke_agent_span = spans[0] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + # Cached tokens should be aggregated: 0 + 5 = 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5 + # Reasoning tokens should be aggregated: 0 + 3 = 3 + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3 ) + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_call_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() - assert result is not None + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = spans[0] + (transaction,) = events + spans = transaction["spans"] - # Verify invoke_agent span has aggregated usage from both API calls - # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 - # Cached tokens should be aggregated: 0 + 5 = 5 - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5 - # Reasoning tokens should be aggregated: 0 + 3 = 3 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3 + invoke_agent_span = spans[0] + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50 + # Cached tokens should be aggregated: 0 + 5 = 5 + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 5 + # Reasoning tokens should be aggregated: 0 + 3 = 3 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3 + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( sentry_init, + capture_events, capture_items, test_agent, get_model_response, + stream_gen_ai_spans, ): """ Test that invoke_agent spans include the response model from the API response. @@ -2862,53 +4101,106 @@ async def test_invoke_agent_span_includes_response_model( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify invoke_agent span has response model from API - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + # Verify invoke_agent span has response model from API + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Also verify ai_client span has it + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() - # Also verify ai_client span has it - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + + # Verify invoke_agent span has response model from API + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Also verify ai_client span has it + assert "gen_ai.response.model" in ai_client_span["data"] + assert ( + ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_uses_last_response_model( sentry_init, + capture_events, capture_items, test_agent, get_model_response, + stream_gen_ai_spans, ): """ Test that when an agent makes multiple LLM calls (e.g., with tools), @@ -2996,44 +4288,93 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[first_response, second_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[first_response, second_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent_with_tool, - "What is 5 + 3?", - run_config=test_run_config, - ) + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = spans[0] - first_ai_client_span = spans[1] - second_ai_client_span = spans[3] # After tool span + spans = [item.payload for item in items if item.type == "span"] - # Invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + invoke_agent_span = spans[0] + first_ai_client_span = spans[1] + second_ai_client_span = spans[3] # After tool span - # Each ai_client span has its own response model from the API - assert first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613" - assert ( - second_ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Each ai_client span has its own response model from the API + assert ( + first_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4-0613" + ) + assert ( + second_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[first_response, second_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + + invoke_agent_span = spans[0] + first_ai_client_span = spans[1] + second_ai_client_span = spans[3] # After tool span + + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Each ai_client span has its own response model from the API + assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613" + assert ( + second_ai_client_span["data"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) def test_openai_agents_message_truncation(sentry_init, capture_items): @@ -3280,6 +4621,7 @@ async def test_streaming_ttft_on_chat_span( assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), reason="conversation_id support requires openai-agents >= 0.4.0", @@ -3287,10 +4629,12 @@ async def test_streaming_ttft_on_chat_span( @pytest.mark.asyncio async def test_conversation_id_on_all_spans( sentry_init, + capture_events, capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). @@ -3304,58 +4648,121 @@ async def test_conversation_id_on_all_spans( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - result = await agents.Runner.run( - agent, - "Test input", - run_config=test_run_config, - conversation_id="conv_test_123", - ) + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify workflow span (transaction) has conversation_id - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert ( - transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] - == "conv_test_123" - ) + # Verify workflow span (transaction) has conversation_id + (transaction,) = ( + item.payload for item in items if item.type == "transaction" + ) + + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_test_123" + ) + + # Verify invoke_agent span has conversation_id + assert ( + invoke_agent_span["attributes"]["gen_ai.conversation.id"] + == "conv_test_123" + ) + + # Verify ai_client span has conversation_id + assert ( + ai_client_span["attributes"]["gen_ai.conversation.id"] + == "conv_test_123" + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) - # Verify invoke_agent span has conversation_id - assert invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + assert result is not None - # Verify ai_client span has conversation_id - assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + + # Verify workflow span (transaction) has conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_test_123" + ) + + # Verify invoke_agent span has conversation_id + assert ( + invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + ) + + # Verify ai_client span has conversation_id + assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), reason="conversation_id support requires openai-agents >= 0.4.0", ) @pytest.mark.asyncio async def test_conversation_id_on_tool_span( - sentry_init, capture_items, test_agent, get_model_response + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + stream_gen_ai_spans, ): """ Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run(). @@ -3442,45 +4849,91 @@ def simple_tool(message: str) -> str: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - await agents.Runner.run( - agent_with_tool, - "Use the tool", - run_config=test_run_config, - conversation_id="conv_tool_test_456", + await agents.Runner.run( + agent_with_tool, + "Use the tool", + run_config=test_run_config, + conversation_id="conv_tool_test_456", + ) + + spans = [item.payload for item in items if item.type == "span"] + + # Find the tool span + tool_span = None + for span in spans: + if span.get("name", "").startswith("execute_tool"): + tool_span = span + break + + assert tool_span is not None + # Tool span should have the conversation_id passed to Runner.run() + assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" + + # Workflow span (transaction) should have the same conversation_id + (transaction,) = (item.payload for item in items if item.type == "transaction") + + # Workflow span (transaction) should have the same conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_tool_test_456" ) + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - spans = [item.payload for item in items if item.type == "span"] - # Find the tool span - tool_span = None - for span in spans: - if span.get("name", "").startswith("execute_tool"): - tool_span = span - break - - assert tool_span is not None - # Tool span should have the conversation_id passed to Runner.run() - assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" - - # Workflow span (transaction) should have the same conversation_id - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert ( - transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] - == "conv_tool_test_456" - ) + events = capture_events() + + await agents.Runner.run( + agent_with_tool, + "Use the tool", + run_config=test_run_config, + conversation_id="conv_tool_test_456", + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the tool span + tool_span = None + for span in spans: + if span.get("description", "").startswith("execute_tool"): + tool_span = span + break + + assert tool_span is not None + # Tool span should have the conversation_id passed to Runner.run() + assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456" + + # Workflow span (transaction) should have the same conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_tool_test_456" + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), reason="conversation_id support requires openai-agents >= 0.4.0", @@ -3488,10 +4941,12 @@ def simple_tool(message: str) -> str: @pytest.mark.asyncio async def test_no_conversation_id_when_not_provided( sentry_init, + capture_events, capture_items, test_agent, nonstreaming_responses_model_response, get_model_response, + stream_gen_ai_spans, ): """ Test that gen_ai.conversation.id is not set when not passed to Runner.run(). @@ -3505,40 +4960,84 @@ async def test_no_conversation_id_when_not_provided( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - items = capture_items("span", "transaction") + items = capture_items("span", "transaction") - # Don't pass conversation_id - result = await agents.Runner.run( - agent, "Test input", run_config=test_run_config - ) + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) - assert result is not None + assert result is not None - (transaction,) = (item.payload for item in items if item.type == "transaction") + (transaction,) = ( + item.payload for item in items if item.type == "transaction" + ) - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify conversation_id is NOT set on any spans - assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( - "attributes", {} - ) - assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {}) - assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( + "attributes", {} + ) + assert "gen_ai.conversation.id" not in invoke_agent_span.get( + "attributes", {} + ) + assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + events = capture_events() + + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( + "data", {} + ) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("data", {}) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index cfb1ca09ca..d60058e4ce 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -52,8 +52,15 @@ def inner(): return inner +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_run_async(sentry_init, capture_items, get_test_agent): +async def test_agent_run_async( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that the integration creates spans for async agent runs. """ @@ -61,51 +68,91 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - result = await test_agent.run("Test input") - assert result is not None - assert result.output is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - (transaction,) = (item.payload for item in items if item.type == "transaction") + result = await test_agent.run("Test input") - # Verify transaction (the transaction IS the invoke_agent span) - assert transaction["transaction"] == "invoke_agent test_agent" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + assert result is not None + assert result.output is not None - # The transaction itself should have invoke_agent data - assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" + (transaction,) = (item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + # Verify transaction (the transaction IS the invoke_agent span) + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" - # Check chat span - chat_span = chat_spans[0] - assert "chat" in chat_span["name"] - assert chat_span["attributes"]["gen_ai.operation.name"] == "chat" - assert chat_span["attributes"]["gen_ai.response.streaming"] is False - assert "gen_ai.request.messages" in chat_span["attributes"] - assert "gen_ai.usage.input_tokens" in chat_span["attributes"] - assert "gen_ai.usage.output_tokens" in chat_span["attributes"] + # The transaction itself should have invoke_agent data + assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" + + spans = [item.payload for item in items if item.type == "span"] + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + assert len(chat_spans) >= 1 + + # Check chat span + chat_span = chat_spans[0] + assert "chat" in chat_span["name"] + assert chat_span["attributes"]["gen_ai.operation.name"] == "chat" + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + assert "gen_ai.request.messages" in chat_span["attributes"] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] + assert "gen_ai.usage.output_tokens" in chat_span["attributes"] + else: + events = capture_events() + + result = await test_agent.run("Test input") + assert result is not None + assert result.output is not None + (transaction,) = events + spans = transaction["spans"] + + # Verify transaction (the transaction IS the invoke_agent span) + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + + # The transaction itself should have invoke_agent data + assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" + + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + # Check chat span + chat_span = chat_spans[0] + assert "chat" in chat_span["description"] + assert chat_span["data"]["gen_ai.operation.name"] == "chat" + assert chat_span["data"]["gen_ai.response.streaming"] is False + assert "gen_ai.request.messages" in chat_span["data"] + assert "gen_ai.usage.input_tokens" in chat_span["data"] + assert "gen_ai.usage.output_tokens" in chat_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_run_async_model_error(sentry_init, capture_items): +async def test_agent_run_async_model_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") - def failing_model(messages, info): raise RuntimeError("model exploded") @@ -114,20 +161,43 @@ def failing_model(messages, info): name="test_agent", ) - with pytest.raises(RuntimeError, match="model exploded"): - await agent.run("Test input") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with pytest.raises(RuntimeError, match="model exploded"): + await agent.run("Test input") - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 - assert spans[0]["status"] == "error" + assert spans[0]["status"] == "error" + else: + events = capture_events() + + with pytest.raises(RuntimeError, match="model exploded"): + await agent.run("Test input") + + (error, transaction) = events + assert error["level"] == "error" + spans = transaction["spans"] + assert len(spans) == 1 + assert spans[0]["status"] == "internal_error" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_agent): +async def test_agent_run_async_usage_data( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that the invoke_agent span includes token usage and model data. """ @@ -135,17 +205,30 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - result = await test_agent.run("Test input") - assert result is not None - assert result.output is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + result = await test_agent.run("Test input") + + assert result is not None + assert result.output is not None + + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + result = await test_agent.run("Test input") + + assert result is not None + assert result.output is not None + + (transaction,) = events - (transaction,) = (item.payload for item in items if item.type == "transaction") # Verify transaction (the transaction IS the invoke_agent span) assert transaction["transaction"] == "invoke_agent test_agent" @@ -171,7 +254,14 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a assert trace_data["gen_ai.response.model"] == "test" # Test model name -def test_agent_run_sync(sentry_init, capture_items, get_test_agent): +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_agent_run_sync( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that the integration creates spans for sync agent runs. """ @@ -179,42 +269,76 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - result = test_agent.run_sync("Test input") - assert result is not None - assert result.output is not None + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] + result = test_agent.run_sync("Test input") - # Verify transaction - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "invoke_agent test_agent" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + assert result is not None + assert result.output is not None - # Find span types - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + spans = [item.payload for item in items if item.type == "span"] - # Verify streaming flag is False for sync - for chat_span in chat_spans: - assert chat_span["attributes"]["gen_ai.response.streaming"] is False + # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + + # Find span types + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + assert len(chat_spans) >= 1 + + # Verify streaming flag is False for sync + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + else: + events = capture_events() + + result = test_agent.run_sync("Test input") + + assert result is not None + assert result.output is not None + + (transaction,) = events + spans = transaction["spans"] -def test_agent_run_sync_model_error(sentry_init, capture_items): + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + + # Find span types + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + # Verify streaming flag is False for sync + for chat_span in chat_spans: + assert chat_span["data"]["gen_ai.response.streaming"] is False + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_agent_run_sync_model_error( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("event", "transaction", "span") - def failing_model(messages, info): raise RuntimeError("model exploded") @@ -223,20 +347,43 @@ def failing_model(messages, info): name="test_agent", ) - with pytest.raises(RuntimeError, match="model exploded"): - agent.run_sync("Test input") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + with pytest.raises(RuntimeError, match="model exploded"): + agent.run_sync("Test input") - spans = [item.payload for item in items if item.type == "span"] - assert len(spans) == 1 + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + + assert spans[0]["status"] == "error" + else: + events = capture_events() + + with pytest.raises(RuntimeError, match="model exploded"): + agent.run_sync("Test input") - assert spans[0]["status"] == "error" + (error, transaction) = events + assert error["level"] == "error" + + spans = transaction["spans"] + assert len(spans) == 1 + + assert spans[0]["status"] == "internal_error" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): +async def test_agent_run_stream( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that the integration creates spans for streaming agent runs. """ @@ -244,43 +391,86 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - async with test_agent.run_stream("Test input") as result: - # Consume the stream - async for _ in result.stream_output(): - pass - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Verify transaction - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "invoke_agent test_agent" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + async with test_agent.run_stream("Test input") as result: + # Consume the stream + async for _ in result.stream_output(): + pass - # Find chat spans - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + spans = [item.payload for item in items if item.type == "span"] - # Verify streaming flag is True for streaming - for chat_span in chat_spans: - assert chat_span["attributes"]["gen_ai.response.streaming"] is True - assert "gen_ai.request.messages" in chat_span["attributes"] - assert "gen_ai.usage.input_tokens" in chat_span["attributes"] - # Streaming responses should still have output data - assert ( - "gen_ai.response.text" in chat_span["attributes"] - or "gen_ai.response.model" in chat_span["attributes"] - ) + # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") + + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + + # Find chat spans + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + assert len(chat_spans) >= 1 + + # Verify streaming flag is True for streaming + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is True + assert "gen_ai.request.messages" in chat_span["attributes"] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] + # Streaming responses should still have output data + assert ( + "gen_ai.response.text" in chat_span["attributes"] + or "gen_ai.response.model" in chat_span["attributes"] + ) + else: + events = capture_events() + + async with test_agent.run_stream("Test input") as result: + # Consume the stream + async for _ in result.stream_output(): + pass + (transaction,) = events + spans = transaction["spans"] + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" + + # Find chat spans + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + # Verify streaming flag is True for streaming + for chat_span in chat_spans: + assert chat_span["data"]["gen_ai.response.streaming"] is True + assert "gen_ai.request.messages" in chat_span["data"] + assert "gen_ai.usage.input_tokens" in chat_span["data"] + # Streaming responses should still have output data + assert ( + "gen_ai.response.text" in chat_span["data"] + or "gen_ai.response.model" in chat_span["data"] + ) + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agent): +async def test_agent_run_stream_events( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that run_stream_events creates spans (it uses run internally, so non-streaming). """ @@ -288,33 +478,66 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - # Consume all events test_agent = get_test_agent() - async for _ in test_agent.run_stream_events("Test input"): - pass - # Verify transaction - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "invoke_agent test_agent" + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find chat spans - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + async for _ in test_agent.run_stream_events("Test input"): + pass - # run_stream_events uses run() internally, so streaming should be False - for chat_span in chat_spans: - assert chat_span["attributes"]["gen_ai.response.streaming"] is False + # Verify transaction + (transaction,) = (item.payload for item in items if item.type == "transaction") + + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + # Find chat spans + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + assert len(chat_spans) >= 1 + # run_stream_events uses run() internally, so streaming should be False + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + else: + events = capture_events() + + async for _ in test_agent.run_stream_events("Test input"): + pass + + (transaction,) = events + + # Verify transaction + assert transaction["transaction"] == "invoke_agent test_agent" + + # Find chat spans + spans = transaction["spans"] + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + # run_stream_events uses run() internally, so streaming should be False + for chat_span in chat_spans: + assert chat_span["data"]["gen_ai.response.streaming"] is False + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): +async def test_agent_with_tools( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that tool execution creates execute_tool spans. """ @@ -322,6 +545,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -331,50 +555,90 @@ def add_numbers(a: int, b: int) -> int: """Add two numbers together.""" return a + b - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - result = await test_agent.run("What is 5 + 3?") + result = await test_agent.run("What is 5 + 3?") - assert result is not None + assert result is not None - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] + + # Should have tool spans + assert len(tool_spans) >= 1 + + # Check tool span + tool_span = tool_spans[0] + assert "execute_tool" in tool_span["name"] + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] + + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"][ + "gen_ai.request.available_tools" + ] + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + else: + events = capture_events() + + result = await test_agent.run("What is 5 + 3?") + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] - # Should have tool spans - assert len(tool_spans) >= 1 + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] - # Check tool span - tool_span = tool_spans[0] - assert "execute_tool" in tool_span["name"] - assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in tool_span["attributes"] - assert "gen_ai.tool.output" in tool_span["attributes"] + # Should have tool spans + assert len(tool_spans) >= 1 - # Check chat spans have available_tools - for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["attributes"] - available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] - # Available tools is serialized as a string - assert "add_numbers" in available_tools_str + # Check tool span + tool_span = tool_spans[0] + assert "execute_tool" in tool_span["description"] + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["data"] + assert "gen_ai.tool.output" in tool_span["data"] + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["data"] + available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "handled_tool_call_exceptions", [False, True], ) @pytest.mark.asyncio async def test_agent_with_tool_model_retry( - sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions + sentry_init, + capture_events, + capture_items, + get_test_agent, + handled_tool_call_exceptions, + stream_gen_ai_spans, ): """ Test that a handled exception is captured when a tool raises ModelRetry. @@ -387,6 +651,7 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) retries = 0 @@ -402,62 +667,117 @@ def add_numbers(a: int, b: int) -> float: raise ModelRetry(message="Try again with the same arguments.") return a + b - items = capture_items("event", "transaction", "span") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") - result = await test_agent.run("What is 5 + 3?") + result = await test_agent.run("What is 5 + 3?") - assert result is not None + assert result is not None - if handled_tool_call_exceptions: - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" - assert error["exception"]["values"][0]["mechanism"]["handled"] + if handled_tool_call_exceptions: + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + assert error["exception"]["values"][0]["mechanism"]["handled"] - spans = [item.payload for item in items if item.type == "span"] - # Find child span types (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] + spans = [item.payload for item in items if item.type == "span"] + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] - # Should have tool spans - assert len(tool_spans) >= 1 + # Should have tool spans + assert len(tool_spans) >= 1 - # Check tool spans - model_retry_tool_span = tool_spans[0] - assert "execute_tool" in model_retry_tool_span["name"] - assert ( - model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - ) - assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] + # Check tool spans + model_retry_tool_span = tool_spans[0] + assert "execute_tool" in model_retry_tool_span["name"] + assert ( + model_retry_tool_span["attributes"]["gen_ai.operation.name"] + == "execute_tool" + ) + assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] + + tool_span = tool_spans[1] + assert "execute_tool" in tool_span["name"] + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] + + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"][ + "gen_ai.request.available_tools" + ] + + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + else: + events = capture_events() + + result = await test_agent.run("What is 5 + 3?") + + assert result is not None - tool_span = tool_spans[1] - assert "execute_tool" in tool_span["name"] - assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in tool_span["attributes"] - assert "gen_ai.tool.output" in tool_span["attributes"] + if handled_tool_call_exceptions: + (error, transaction) = events + else: + (transaction,) = events + spans = transaction["spans"] - # Check chat spans have available_tools - for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["attributes"] - available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] - # Available tools is serialized as a string - assert "add_numbers" in available_tools_str + if handled_tool_call_exceptions: + assert error["level"] == "error" + assert error["exception"]["values"][0]["mechanism"]["handled"] + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + # Should have tool spans + assert len(tool_spans) >= 1 + + # Check tool spans + model_retry_tool_span = tool_spans[0] + assert "execute_tool" in model_retry_tool_span["description"] + assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["data"] + + tool_span = tool_spans[1] + assert "execute_tool" in tool_span["description"] + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in tool_span["data"] + assert "gen_ai.tool.output" in tool_span["data"] + + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["data"] + available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "handled_tool_call_exceptions", [False, True], ) @pytest.mark.asyncio async def test_agent_with_tool_validation_error( - sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions + sentry_init, + capture_events, + capture_items, + get_test_agent, + handled_tool_call_exceptions, + stream_gen_ai_spans, ): """ Test that a handled exception is captured when a tool has unsatisfiable constraints. @@ -470,6 +790,7 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -479,54 +800,109 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: """Add two numbers together.""" return a + b - items = capture_items("event", "transaction", "span") + if stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") - result = None - with pytest.raises(UnexpectedModelBehavior): - result = await test_agent.run("What is 5 + 3?") + result = None + with pytest.raises(UnexpectedModelBehavior): + result = await test_agent.run("What is 5 + 3?") - assert result is None + assert result is None - if handled_tool_call_exceptions: - ( - error, - model_behaviour_error, - ) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" - assert error["exception"]["values"][0]["mechanism"]["handled"] + if handled_tool_call_exceptions: + ( + error, + model_behaviour_error, + ) = (item.payload for item in items if item.type == "event") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] - - # Should have tool spans - assert len(tool_spans) >= 1 + assert error["level"] == "error" + assert error["exception"]["values"][0]["mechanism"]["handled"] - # Check tool spans - model_retry_tool_span = tool_spans[0] - assert "execute_tool" in model_retry_tool_span["name"] - assert ( - model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - ) - assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" - assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] - - # Check chat spans have available_tools - for chat_span in chat_spans: - assert "gen_ai.request.available_tools" in chat_span["attributes"] - available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] - # Available tools is serialized as a string - assert "add_numbers" in available_tools_str + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] + # Should have tool spans + assert len(tool_spans) >= 1 + # Check tool spans + model_retry_tool_span = tool_spans[0] + assert "execute_tool" in model_retry_tool_span["name"] + assert ( + model_retry_tool_span["attributes"]["gen_ai.operation.name"] + == "execute_tool" + ) + assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["attributes"] + + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["attributes"] + available_tools_str = chat_span["attributes"][ + "gen_ai.request.available_tools" + ] + + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + else: + events = capture_events() + + result = None + with pytest.raises(UnexpectedModelBehavior): + result = await test_agent.run("What is 5 + 3?") + + assert result is None + + if handled_tool_call_exceptions: + (error, model_behaviour_error, transaction) = events + else: + ( + model_behaviour_error, + transaction, + ) = events + spans = transaction["spans"] + + if handled_tool_call_exceptions: + assert error["level"] == "error" + assert error["exception"]["values"][0]["mechanism"]["handled"] + + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + + # Should have tool spans + assert len(tool_spans) >= 1 + + # Check tool spans + model_retry_tool_span = tool_spans[0] + assert "execute_tool" in model_retry_tool_span["description"] + assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" + assert "gen_ai.tool.input" in model_retry_tool_span["data"] + + # Check chat spans have available_tools + for chat_span in chat_spans: + assert "gen_ai.request.available_tools" in chat_span["data"] + available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] + # Available tools is serialized as a string + assert "add_numbers" in available_tools_str + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_agent): +async def test_agent_with_tools_streaming( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that tool execution works correctly with streaming. """ @@ -534,6 +910,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -543,68 +920,123 @@ def multiply(a: int, b: int) -> int: """Multiply two numbers.""" return a * b - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - async with test_agent.run_stream("What is 7 times 8?") as result: - async for _ in result.stream_output(): - pass + async with test_agent.run_stream("What is 7 times 8?") as result: + async for _ in result.stream_output(): + pass - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find span types - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] + # Find span types + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] + + # Should have tool spans + assert len(tool_spans) >= 1 + + # Verify streaming flag is True + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is True - # Should have tool spans - assert len(tool_spans) >= 1 + # Check tool span + tool_span = tool_spans[0] + assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply" + assert "gen_ai.tool.input" in tool_span["attributes"] + assert "gen_ai.tool.output" in tool_span["attributes"] + else: + events = capture_events() + + async with test_agent.run_stream("What is 7 times 8?") as result: + async for _ in result.stream_output(): + pass + + (transaction,) = events + spans = transaction["spans"] + + # Find span types + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + + # Should have tool spans + assert len(tool_spans) >= 1 - # Verify streaming flag is True - for chat_span in chat_spans: - assert chat_span["attributes"]["gen_ai.response.streaming"] is True + # Verify streaming flag is True + for chat_span in chat_spans: + assert chat_span["data"]["gen_ai.response.streaming"] is True - # Check tool span - tool_span = tool_spans[0] - assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply" - assert "gen_ai.tool.input" in tool_span["attributes"] - assert "gen_ai.tool.output" in tool_span["attributes"] + # Check tool span + tool_span = tool_spans[0] + assert tool_span["data"]["gen_ai.tool.name"] == "multiply" + assert "gen_ai.tool.input" in tool_span["data"] + assert "gen_ai.tool.output" in tool_span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_model_settings(sentry_init, capture_items, get_test_agent_with_settings): +async def test_model_settings( + sentry_init, + capture_events, + capture_items, + get_test_agent_with_settings, + stream_gen_ai_spans, +): """ Test that model settings are captured in spans. """ sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent_with_settings = get_test_agent_with_settings() - await test_agent_with_settings.run("Test input") - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find chat span - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + await test_agent_with_settings.run("Test input") - chat_span = chat_spans[0] - # Check that model settings are captured - assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7 - assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100 - assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9 + spans = [item.payload for item in items if item.type == "span"] + # Find chat span + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + # Check that model settings are captured + assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7 + assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100 + assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9 + else: + events = capture_events() + + await test_agent_with_settings.run("Test input") + + (transaction,) = events + spans = transaction["spans"] + + # Find chat span + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + # Check that model settings are captured + assert chat_span["data"].get("gen_ai.request.temperature") == 0.7 + assert chat_span["data"].get("gen_ai.request.max_tokens") == 100 + assert chat_span["data"].get("gen_ai.request.top_p") == 0.9 + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -616,7 +1048,12 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se ], ) async def test_system_prompt_attribute( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """ Test that system prompts are included as the first message. @@ -631,38 +1068,72 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await agent.run("Hello") + await agent.run("Hello") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + # The transaction IS the invoke_agent span, check for messages in chat spans instead + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] - chat_span = chat_spans[0] + assert len(chat_spans) >= 1 - if send_default_pii and include_prompts: - system_instructions = chat_span["attributes"][ - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS - ] - assert json.loads(system_instructions) == [ - { - "type": "text", - "content": "You are a helpful assistant specialized in testing.", - } - ] + chat_span = chat_spans[0] + + if send_default_pii and include_prompts: + system_instructions = chat_span["attributes"][ + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS + ] + assert json.loads(system_instructions) == [ + { + "type": "text", + "content": "You are a helpful assistant specialized in testing.", + } + ] + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] + events = capture_events() + + await agent.run("Hello") + + (transaction,) = events + spans = transaction["spans"] + # The transaction IS the invoke_agent span, check for messages in chat spans instead + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + + if send_default_pii and include_prompts: + system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + assert json.loads(system_instructions) == [ + { + "type": "text", + "content": "You are a helpful assistant specialized in testing.", + } + ] + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_error_handling(sentry_init, capture_items): +async def test_error_handling( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test error handling in agent execution. """ @@ -676,23 +1147,42 @@ async def test_error_handling(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Simple run that should succeed - await agent.run("Hello") + # Simple run that should succeed + await agent.run("Hello") + + # At minimum, we should have a transaction + transaction = next(item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + # Simple run that should succeed + await agent.run("Hello") + + # At minimum, we should have a transaction + assert len(events) >= 1 + transaction = [e for e in events if e.get("type") == "transaction"][0] - # At minimum, we should have a transaction - transaction = next(item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "invoke_agent test_error" # Transaction should complete successfully (status key may not exist if no error) trace_status = transaction["contexts"]["trace"].get("status") assert trace_status != "error" # Could be None or some other status +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_without_pii(sentry_init, capture_items, get_test_agent): +async def test_without_pii( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that PII is not captured when send_default_pii is False. """ @@ -700,28 +1190,53 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - test_agent = get_test_agent() - await test_agent.run("Sensitive input") + test_agent = get_test_agent() + await test_agent.run("Sensitive input") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + # Verify that messages and response text are not captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + else: + events = capture_events() - # Verify that messages and response text are not captured - for span in chat_spans: - assert "gen_ai.request.messages" not in span["attributes"] - assert "gen_ai.response.text" not in span["attributes"] + test_agent = get_test_agent() + await test_agent.run("Sensitive input") + (transaction,) = events + spans = transaction["spans"] + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + # Verify that messages and response text are not captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): +async def test_without_pii_tools( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that tool input/output are not captured when send_default_pii is False. """ @@ -729,6 +1244,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -738,56 +1254,103 @@ def sensitive_tool(data: str) -> str: """A tool with sensitive data.""" return f"Processed: {data}" - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await test_agent.run("Use sensitive tool with private data") + await test_agent.run("Use sensitive tool with private data") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find tool spans - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] + # Find tool spans + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] + + # If tool was executed, verify input/output are not captured + for tool_span in tool_spans: + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] + else: + events = capture_events() + + await test_agent.run("Use sensitive tool with private data") - # If tool was executed, verify input/output are not captured - for tool_span in tool_spans: - assert "gen_ai.tool.input" not in tool_span["attributes"] - assert "gen_ai.tool.output" not in tool_span["attributes"] + (transaction,) = events + spans = transaction["spans"] + # Find tool spans + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + # If tool was executed, verify input/output are not captured + for tool_span in tool_spans: + assert "gen_ai.tool.input" not in tool_span["data"] + assert "gen_ai.tool.output" not in tool_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_agent): +async def test_multiple_agents_concurrent( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that multiple agents can run concurrently without interfering. """ sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() async def run_agent(input_text): return await test_agent.run(input_text) - # Run 3 agents concurrently - results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert len(results) == 3 + # Run 3 agents concurrently + results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) - # Verify each transaction is separate - events = [item.payload for item in items if item.type == "transaction"] - assert len(events) == 3 - for i, transaction in enumerate(events): - assert transaction["transaction"] == "invoke_agent test_agent" + assert len(results) == 3 + + # Verify each transaction is separate + events = [item.payload for item in items if item.type == "transaction"] + assert len(events) == 3 + for i, transaction in enumerate(events): + assert transaction["transaction"] == "invoke_agent test_agent" + else: + events = capture_events() + + # Run 3 agents concurrently + results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) + + assert len(results) == 3 + assert len(events) == 3 + # Verify each transaction is separate + for i, transaction in enumerate(events): + assert transaction["type"] == "transaction" + assert transaction["transaction"] == "invoke_agent test_agent" + # Each should have its own spans + assert len(transaction["spans"]) >= 1 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_message_history(sentry_init, capture_items): +async def test_message_history( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that full conversation history is captured in chat spans. """ @@ -800,10 +1363,9 @@ async def test_message_history(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - # First message await agent.run("Hello, I'm Alice") @@ -820,58 +1382,114 @@ async def test_message_history(sentry_init, capture_items): ), ] - await agent.run("What is my name?", message_history=history) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # We should have 2 transactions - events = [item.payload for item in items if item.type == "transaction"] - assert len(events) >= 2 + await agent.run("What is my name?", message_history=history) - # Check the second transaction has the full history - second_transaction = events[1] - spans = second_transaction["spans"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + # We should have 2 transactions + events = [item.payload for item in items if item.type == "transaction"] - if chat_spans: - chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["attributes"]: - messages_data = chat_span["attributes"]["gen_ai.request.messages"] - # Should have multiple messages including history - assert len(messages_data) > 1 + # Check the second transaction has the full history + second_transaction = events[1] + spans = second_transaction["spans"] + + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + if chat_spans: + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_data = chat_span["attributes"]["gen_ai.request.messages"] + # Should have multiple messages including history + assert len(messages_data) > 1 + else: + events = capture_events() + + await agent.run("What is my name?", message_history=history) + + # We should have 2 transactions + assert len(events) >= 2 + # Check the second transaction has the full history + second_transaction = events[1] + spans = second_transaction["spans"] + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + if chat_spans: + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["data"]: + messages_data = chat_span["data"]["gen_ai.request.messages"] + # Should have multiple messages including history + assert len(messages_data) > 1 + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_gen_ai_system(sentry_init, capture_items, get_test_agent): +async def test_gen_ai_system( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that gen_ai.system is set from the model. """ sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - await test_agent.run("Test input") - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find chat span - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + await test_agent.run("Test input") - chat_span = chat_spans[0] - # gen_ai.system should be set from the model (TestModel -> 'test') - assert "gen_ai.system" in chat_span["attributes"] - assert chat_span["attributes"]["gen_ai.system"] == "test" + spans = [item.payload for item in items if item.type == "span"] + + # Find chat span + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + # gen_ai.system should be set from the model (TestModel -> 'test') + assert "gen_ai.system" in chat_span["attributes"] + assert chat_span["attributes"]["gen_ai.system"] == "test" + else: + events = capture_events() + await test_agent.run("Test input") + (transaction,) = events + spans = transaction["spans"] + + # Find chat span + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + # gen_ai.system should be set from the model (TestModel -> 'test') + assert "gen_ai.system" in chat_span["data"] + assert chat_span["data"]["gen_ai.system"] == "test" + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_include_prompts_false(sentry_init, capture_items, get_test_agent): +async def test_include_prompts_false( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that prompts are not captured when include_prompts=False. """ @@ -879,28 +1497,53 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent) integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - await test_agent.run("Sensitive prompt") - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + await test_agent.run("Sensitive prompt") + + spans = [item.payload for item in items if item.type == "span"] - # Verify that messages and response text are not captured - for span in chat_spans: - assert "gen_ai.request.messages" not in span["attributes"] - assert "gen_ai.response.text" not in span["attributes"] + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + # Verify that messages and response text are not captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + else: + events = capture_events() + + await test_agent.run("Sensitive prompt") + + (transaction,) = events + spans = transaction["spans"] + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + # Verify that messages and response text are not captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): +async def test_include_prompts_true( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that prompts are captured when include_prompts=True (default). """ @@ -908,29 +1551,52 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent): integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - await test_agent.run("Test prompt") - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + await test_agent.run("Test prompt") - # Verify that messages are captured in chat spans - assert len(chat_spans) >= 1 - for chat_span in chat_spans: - assert "gen_ai.request.messages" in chat_span["attributes"] + spans = [item.payload for item in items if item.type == "span"] + + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + # Verify that messages are captured in chat spans + assert len(chat_spans) >= 1 + for chat_span in chat_spans: + assert "gen_ai.request.messages" in chat_span["attributes"] + else: + events = capture_events() + + await test_agent.run("Test prompt") + (transaction,) = events + spans = transaction["spans"] + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + # Verify that messages are captured in chat spans + assert len(chat_spans) >= 1 + for chat_span in chat_spans: + assert "gen_ai.request.messages" in chat_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_include_prompts_false_with_tools( - sentry_init, capture_items, get_test_agent + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, ): """ Test that tool input/output are not captured when include_prompts=False. @@ -939,6 +1605,7 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -948,27 +1615,50 @@ def test_tool(value: int) -> int: """A test tool.""" return value * 2 - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await test_agent.run("Use the test tool with value 5") + await test_agent.run("Use the test tool with value 5") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find tool spans - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] + # Find tool spans + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] + + # If tool was executed, verify input/output are not captured + for tool_span in tool_spans: + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] + else: + events = capture_events() + + await test_agent.run("Use the test tool with value 5") - # If tool was executed, verify input/output are not captured - for tool_span in tool_spans: - assert "gen_ai.tool.input" not in tool_span["attributes"] - assert "gen_ai.tool.output" not in tool_span["attributes"] + (transaction,) = events + spans = transaction["spans"] + # Find tool spans + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + # If tool was executed, verify input/output are not captured + for tool_span in tool_spans: + assert "gen_ai.tool.input" not in tool_span["data"] + assert "gen_ai.tool.output" not in tool_span["data"] + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test_agent): +async def test_include_prompts_requires_pii( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that include_prompts requires send_default_pii=True. """ @@ -976,28 +1666,52 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - test_agent = get_test_agent() - await test_agent.run("Test prompt") - spans = [item.payload for item in items if item.type == "span"] + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Find child spans (invoke_agent is the transaction, not a child span) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + await test_agent.run("Test prompt") + + spans = [item.payload for item in items if item.type == "span"] + + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] - # Even with include_prompts=True, if PII is disabled, messages should not be captured - for span in chat_spans: - assert "gen_ai.request.messages" not in span["attributes"] - assert "gen_ai.response.text" not in span["attributes"] + # Even with include_prompts=True, if PII is disabled, messages should not be captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + else: + events = capture_events() + + await test_agent.run("Test prompt") + + (transaction,) = events + spans = transaction["spans"] + + # Find child spans (invoke_agent is the transaction, not a child span) + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + # Even with include_prompts=True, if PII is disabled, messages should not be captured + for span in chat_spans: + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_mcp_tool_execution_spans(sentry_init, capture_items): +async def test_mcp_tool_execution_spans( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. @@ -1067,54 +1781,113 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + # Simulate MCP tool execution within a transaction through CombinedToolset + with sentry_sdk.start_transaction( + op="ai.run", name="invoke_agent test_mcp_agent" + ): + # Set up the agent context + scope = sentry_sdk.get_current_scope() + scope._contexts["pydantic_ai_agent"] = { + "_agent": agent, + } - # Simulate MCP tool execution within a transaction through CombinedToolset - with sentry_sdk.start_transaction(op="ai.run", name="invoke_agent test_mcp_agent"): - # Set up the agent context - scope = sentry_sdk.get_current_scope() - scope._contexts["pydantic_ai_agent"] = { - "_agent": agent, - } + # Create a mock tool that simulates an MCP tool from CombinedToolset + from pydantic_ai._run_context import RunContext + from pydantic_ai.result import RunUsage + from pydantic_ai.models.test import TestModel + from pydantic_ai.toolsets.combined import _CombinedToolsetTool + + ctx = RunContext( + deps=None, + model=TestModel(), + usage=RunUsage(), + retry=0, + tool_name="test_mcp_tool", + ) - # Create a mock tool that simulates an MCP tool from CombinedToolset - from pydantic_ai._run_context import RunContext - from pydantic_ai.result import RunUsage - from pydantic_ai.models.test import TestModel - from pydantic_ai.toolsets.combined import _CombinedToolsetTool - - ctx = RunContext( - deps=None, - model=TestModel(), - usage=RunUsage(), - retry=0, - tool_name="test_mcp_tool", - ) + tool_name = "test_mcp_tool" + + # Create a tool that points to the MCP server + # This simulates how CombinedToolset wraps tools from different sources + tool = _CombinedToolsetTool( + toolset=combined, + tool_def=MagicMock(name=tool_name), + max_retries=0, + args_validator=MagicMock(), + source_toolset=mock_server, + source_tool=MagicMock(), + ) - tool_name = "test_mcp_tool" - - # Create a tool that points to the MCP server - # This simulates how CombinedToolset wraps tools from different sources - tool = _CombinedToolsetTool( - toolset=combined, - tool_def=MagicMock(name=tool_name), - max_retries=0, - args_validator=MagicMock(), - source_toolset=mock_server, - source_tool=MagicMock(), - ) + try: + await combined.call_tool(tool_name, {"query": "test"}, ctx, tool) + except Exception: + # MCP tool might raise if not fully mocked, that's okay + pass - try: - await combined.call_tool(tool_name, {"query": "test"}, ctx, tool) - except Exception: - # MCP tool might raise if not fully mocked, that's okay - pass + events_list = items + if len(events_list) == 0: + pytest.skip("No events captured, MCP test setup incomplete") + + (transaction,) = events_list + transaction["spans"] + else: + events = capture_events() + + # Simulate MCP tool execution within a transaction through CombinedToolset + with sentry_sdk.start_transaction( + op="ai.run", name="invoke_agent test_mcp_agent" + ) as transaction: + # Set up the agent context + scope = sentry_sdk.get_current_scope() + scope._contexts["pydantic_ai_agent"] = { + "_agent": agent, + } + + # Create a mock tool that simulates an MCP tool from CombinedToolset + from pydantic_ai._run_context import RunContext + from pydantic_ai.result import RunUsage + from pydantic_ai.models.test import TestModel + from pydantic_ai.toolsets.combined import _CombinedToolsetTool + + ctx = RunContext( + deps=None, + model=TestModel(), + usage=RunUsage(), + retry=0, + tool_name="test_mcp_tool", + ) + + tool_name = "test_mcp_tool" + + # Create a tool that points to the MCP server + # This simulates how CombinedToolset wraps tools from different sources + tool = _CombinedToolsetTool( + toolset=combined, + tool_def=MagicMock(name=tool_name), + max_retries=0, + args_validator=MagicMock(), + source_toolset=mock_server, + source_tool=MagicMock(), + ) + + try: + await combined.call_tool(tool_name, {"query": "test"}, ctx, tool) + except Exception: + # MCP tool might raise if not fully mocked, that's okay + pass - events_list = items - if len(events_list) == 0: - pytest.skip("No events captured, MCP test setup incomplete") + events_list = events + if len(events_list) == 0: + pytest.skip("No events captured, MCP test setup incomplete") + + (transaction,) = events_list + transaction["spans"] # Note: This test manually calls combined.call_tool which doesn't go through # ToolManager._call_tool (which is what the integration patches). @@ -1284,8 +2057,14 @@ async def run_and_check_context(agent, agent_name): # ==================== Additional Coverage Tests ==================== +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): +async def test_invoke_agent_with_list_user_prompt( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that invoke_agent span handles list user prompts correctly. """ @@ -1298,16 +2077,26 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Use a list as user prompt - await agent.run(["First part", "Second part"]) + # Use a list as user prompt + await agent.run(["First part", "Second part"]) + + (transaction,) = [item.payload for item in items if item.type == "transaction"] + else: + events = capture_events() + + # Use a list as user prompt + await agent.run(["First part", "Second part"]) + + (transaction,) = events # Check that the invoke_agent transaction has messages data # The invoke_agent is the transaction itself - (transaction,) = [item.payload for item in items if item.type == "transaction"] if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: messages_str = transaction["contexts"]["trace"]["data"][ "gen_ai.request.messages" @@ -1316,6 +2105,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): assert "Second part" in messages_str +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -1327,7 +2117,12 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items): ], ) async def test_invoke_agent_with_instructions( - sentry_init, capture_items, send_default_pii, include_prompts + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, ): """ Test that invoke_agent span handles instructions correctly. @@ -1348,32 +2143,59 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await agent.run("Test input") + await agent.run("Test input") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + # The transaction IS the invoke_agent span, check for messages in chat spans instead + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] - chat_span = chat_spans[0] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + + if send_default_pii and include_prompts: + system_instructions = chat_span["attributes"][ + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS + ] + assert json.loads(system_instructions) == [ + {"type": "text", "content": "System prompt"}, + {"type": "text", "content": "Instruction 1\nInstruction 2"}, + ] + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] - if send_default_pii and include_prompts: - system_instructions = chat_span["attributes"][ - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS - ] - assert json.loads(system_instructions) == [ - {"type": "text", "content": "System prompt"}, - {"type": "text", "content": "Instruction 1\nInstruction 2"}, - ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"] + events = capture_events() + + await agent.run("Test input") + + (transaction,) = events + spans = transaction["spans"] + + # The transaction IS the invoke_agent span, check for messages in chat spans instead + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + + if send_default_pii and include_prompts: + system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + assert json.loads(system_instructions) == [ + {"type": "text", "content": "System prompt"}, + {"type": "text", "content": "Instruction 1\nInstruction 2"}, + ] + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"] @pytest.mark.asyncio @@ -1463,8 +2285,14 @@ async def test_model_settings_object_style(sentry_init, capture_items): assert transaction is not None +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_usage_data_partial(sentry_init, capture_items): +async def test_usage_data_partial( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that usage data is correctly handled when only some fields are present. """ @@ -1476,17 +2304,29 @@ async def test_usage_data_partial(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + await agent.run("Test input") - await agent.run("Test input") + spans = [item.payload for item in items if item.type == "span"] - spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + else: + events = capture_events() + + await agent.run("Test input") + + (transaction,) = events + spans = transaction["spans"] + + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] assert len(chat_spans) >= 1 # Check that usage data fields exist (they may or may not be set depending on TestModel) @@ -1495,8 +2335,14 @@ async def test_usage_data_partial(sentry_init, capture_items): assert chat_span is not None +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_data_from_scope(sentry_init, capture_items): +async def test_agent_data_from_scope( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that agent data can be retrieved from Sentry scope when not passed directly. """ @@ -1509,21 +2355,38 @@ async def test_agent_data_from_scope(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + # The integration automatically sets agent in scope during execution + await agent.run("Test input") + + # Verify agent name is capture + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + # The integration automatically sets agent in scope during execution + await agent.run("Test input") - # The integration automatically sets agent in scope during execution - await agent.run("Test input") + # Verify agent name is capture + (transaction,) = events - # Verify agent name is capture - (transaction,) = (item.payload for item in items if item.type == "transaction") + # Verify agent name is captured assert transaction["transaction"] == "invoke_agent test_scope_agent" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_available_tools_without_description( - sentry_init, capture_items, get_test_agent + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, ): """ Test that available tools are captured even when description is missing. @@ -1531,6 +2394,7 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -1540,24 +2404,46 @@ def tool_without_desc(x: int) -> int: # No docstring = no description return x * 2 - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await test_agent.run("Use the tool with 5") + await test_agent.run("Use the tool with 5") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - if chat_spans: - chat_span = chat_spans[0] - if "gen_ai.request.available_tools" in chat_span["attributes"]: - tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] - assert "tool_without_desc" in tools_str + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + if chat_spans: + chat_span = chat_spans[0] + if "gen_ai.request.available_tools" in chat_span["attributes"]: + tools_str = chat_span["attributes"]["gen_ai.request.available_tools"] + assert "tool_without_desc" in tools_str + else: + events = capture_events() + + await test_agent.run("Use the tool with 5") + (transaction,) = events + spans = transaction["spans"] + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + if chat_spans: + chat_span = chat_spans[0] + if "gen_ai.request.available_tools" in chat_span["data"]: + tools_str = chat_span["data"]["gen_ai.request.available_tools"] + assert "tool_without_desc" in tools_str + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent): +async def test_output_with_tool_calls( + sentry_init, + capture_events, + capture_items, + get_test_agent, + stream_gen_ai_spans, +): """ Test that tool calls in model response are captured correctly. """ @@ -1565,6 +2451,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) test_agent = get_test_agent() @@ -1574,28 +2461,53 @@ def calc_tool(value: int) -> int: """Calculate something.""" return value + 10 - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - await test_agent.run("Use calc_tool with 5") + await test_agent.run("Use calc_tool with 5") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] - # At least one chat span should exist - assert len(chat_spans) >= 1 + # At least one chat span should exist + assert len(chat_spans) >= 1 + + # Check if tool calls are captured in response + for chat_span in chat_spans: + # Tool calls may or may not be in response depending on TestModel behavior + # Just verify the span was created and has basic data + assert "gen_ai.operation.name" in chat_span["attributes"] + else: + events = capture_events() + + await test_agent.run("Use calc_tool with 5") + + (transaction,) = events + spans = transaction["spans"] + + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + # At least one chat span should exist + assert len(chat_spans) >= 1 - # Check if tool calls are captured in response - for chat_span in chat_spans: - # Tool calls may or may not be in response depending on TestModel behavior - # Just verify the span was created and has basic data - assert "gen_ai.operation.name" in chat_span["attributes"] + # Check if tool calls are captured in response + for chat_span in chat_spans: + # Tool calls may or may not be in response depending on TestModel behavior + # Just verify the span was created and has basic data + assert "gen_ai.operation.name" in chat_span["data"] +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_message_formatting_with_different_parts(sentry_init, capture_items): +async def test_message_formatting_with_different_parts( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that different message part types are handled correctly in ai_client span. """ @@ -1610,10 +2522,9 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") - # Create message history with different part types history = [ messages.ModelRequest(parts=[messages.UserPromptPart(content="Hello")]), @@ -1625,23 +2536,44 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item ), ] - await agent.run("What did I say?", message_history=history) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] + await agent.run("What did I say?", message_history=history) - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + spans = [item.payload for item in items if item.type == "span"] - # Should have chat spans - assert len(chat_spans) >= 1 + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] - # Check that messages are captured - chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["attributes"]: - messages_data = chat_span["attributes"]["gen_ai.request.messages"] - # Should contain message history - assert messages_data is not None + # Should have chat spans + assert len(chat_spans) >= 1 + + # Check that messages are captured + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_data = chat_span["attributes"]["gen_ai.request.messages"] + assert messages_data is not None + else: + events = capture_events() + + await agent.run("What did I say?", message_history=history) + + (transaction,) = events + spans = transaction["spans"] + + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] + + # Should have chat spans + assert len(chat_spans) >= 1 + + # Check that messages are captured + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["data"]: + messages_data = chat_span["data"]["gen_ai.request.messages"] + # Should contain message history + assert messages_data is not None @pytest.mark.asyncio @@ -1699,8 +2631,14 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite assert transaction is not None +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_agent_without_name(sentry_init, capture_items): +async def test_agent_without_name( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that agent without a name is handled correctly. """ @@ -1710,14 +2648,26 @@ async def test_agent_without_name(sentry_init, capture_items): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + await agent.run("Test input") + + # Should still create transaction, just with default name + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + await agent.run("Test input") + + (transaction,) = events - await agent.run("Test input") + # Should still create transaction, just with default name + assert transaction["type"] == "transaction" - # Should still create transaction, just with default name - (transaction,) = (item.payload for item in items if item.type == "transaction") # Transaction name should be "invoke_agent agent" or similar default assert "invoke_agent" in transaction["transaction"] @@ -1869,8 +2819,14 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): assert transaction is not None +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_message_parts_with_tool_return(sentry_init, capture_items): +async def test_message_parts_with_tool_return( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that ToolReturnPart messages are handled correctly. """ @@ -1890,18 +2846,30 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - # Run with history containing tool return - await agent.run("Use test_tool with 5") + # Run with history containing tool return + await agent.run("Use test_tool with 5") - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + else: + events = capture_events() + + # Run with history containing tool return + await agent.run("Use test_tool with 5") + + (transaction,) = events + spans = transaction["spans"] + + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] # Should have chat spans assert len(chat_spans) >= 1 @@ -2791,63 +3759,116 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type): return False +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_binary_content_encoding_image(sentry_init, capture_items): +async def test_binary_content_encoding_image( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that BinaryContent with image data is properly encoded in messages.""" sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - with sentry_sdk.start_transaction(op="test", name="test"): - span = sentry_sdk.start_span(op="test_span") - binary_content = BinaryContent( - data=b"fake_image_data_12345", media_type="image/png" - ) - user_part = UserPromptPart(content=["Look at this image:", binary_content]) - mock_msg = MagicMock() - mock_msg.parts = [user_part] - mock_msg.instructions = None + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_data_12345", media_type="image/png" + ) + user_part = UserPromptPart(content=["Look at this image:", binary_content]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None - _set_input_messages(span, [mock_msg]) - span.finish() + _set_input_messages(span, [mock_msg]) + span.finish() - (event,) = (item.payload for item in items if item.type == "transaction") + (event,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_data_12345", media_type="image/png" + ) + user_part = UserPromptPart(content=["Look at this image:", binary_content]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + (event,) = events span_data = event["spans"][0]["data"] messages_data = _get_messages_from_span(span_data) assert _find_binary_content(messages_data, "image", "image/png") +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_binary_content_encoding_mixed_content(sentry_init, capture_items): +async def test_binary_content_encoding_mixed_content( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that BinaryContent mixed with text content is properly handled.""" sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - with sentry_sdk.start_transaction(op="test", name="test"): - span = sentry_sdk.start_span(op="test_span") - binary_content = BinaryContent( - data=b"fake_image_bytes", media_type="image/jpeg" - ) - user_part = UserPromptPart( - content=["Here is an image:", binary_content, "What do you see?"] - ) - mock_msg = MagicMock() - mock_msg.parts = [user_part] - mock_msg.instructions = None + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_bytes", media_type="image/jpeg" + ) + user_part = UserPromptPart( + content=["Here is an image:", binary_content, "What do you see?"] + ) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None - _set_input_messages(span, [mock_msg]) - span.finish() + _set_input_messages(span, [mock_msg]) + span.finish() + + (event,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_bytes", media_type="image/jpeg" + ) + user_part = UserPromptPart( + content=["Here is an image:", binary_content, "What do you see?"] + ) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + (event,) = events - (event,) = (item.payload for item in items if item.type == "transaction") span_data = event["spans"][0]["data"] messages_data = _get_messages_from_span(span_data) @@ -2862,8 +3883,14 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items) assert _find_binary_content(messages_data, "image", "image/jpeg") +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_binary_content_in_agent_run(sentry_init, capture_items): +async def test_binary_content_in_agent_run( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that BinaryContent in actual agent run is properly captured in spans.""" agent = Agent("test", name="test_binary_agent") @@ -2871,53 +3898,102 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") binary_content = BinaryContent( data=b"fake_image_data_for_testing", media_type="image/png" ) - await agent.run(["Analyze this image:", binary_content]) - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - chat_span = chat_spans[0] - if "gen_ai.request.messages" in chat_span["attributes"]: - messages_str = str(chat_span["attributes"]["gen_ai.request.messages"]) - assert any(keyword in messages_str for keyword in ["blob", "image", "base64"]) + await agent.run(["Analyze this image:", binary_content]) + + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["attributes"]: + messages_str = str(chat_span["attributes"]["gen_ai.request.messages"]) + + assert any( + keyword in messages_str for keyword in ["blob", "image", "base64"] + ) + else: + events = capture_events() + + await agent.run(["Analyze this image:", binary_content]) + + (transaction,) = events + chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["data"]: + messages_str = str(chat_span["data"]["gen_ai.request.messages"]) + assert any( + keyword in messages_str for keyword in ["blob", "image", "base64"] + ) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): +async def test_set_usage_data_with_cache_tokens( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """Test that cache_read_tokens and cache_write_tokens are tracked.""" sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - with sentry_sdk.start_transaction(op="test", name="test"): - span = sentry_sdk.start_span(op="test_span") - usage = RequestUsage( - input_tokens=100, - output_tokens=50, - cache_read_tokens=80, - cache_write_tokens=20, - ) - _set_usage_data(span, usage) - span.finish() + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + usage = RequestUsage( + input_tokens=100, + output_tokens=50, + cache_read_tokens=80, + cache_write_tokens=20, + ) + _set_usage_data(span, usage) + span.finish() + + (event,) = (item.payload for item in items if item.type == "transaction") + else: + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + usage = RequestUsage( + input_tokens=100, + output_tokens=50, + cache_read_tokens=80, + cache_write_tokens=20, + ) + _set_usage_data(span, usage) + span.finish() + + (event,) = events - (event,) = (item.payload for item in items if item.type == "transaction") (span_data,) = event["spans"] assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "url,image_url_kwargs,expected_content", [ @@ -2960,7 +4036,13 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items): ], ) def test_image_url_base64_content_in_span( - sentry_init, capture_items, url, image_url_kwargs, expected_content + sentry_init, + capture_events, + capture_items, + url, + image_url_kwargs, + expected_content, + stream_gen_ai_spans, ): from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span @@ -2968,39 +4050,67 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + found_image = False + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with sentry_sdk.start_transaction(op="test", name="test"): + image_url = ImageUrl(url=url, **image_url_kwargs) + user_part = UserPromptPart(content=["Look at this image:", image_url]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + span = ai_client_span([mock_msg], None, None, None) + span.finish() + + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + messages_data = _get_messages_from_span(chat_spans[0]["attributes"]) - with sentry_sdk.start_transaction(op="test", name="test"): - image_url = ImageUrl(url=url, **image_url_kwargs) - user_part = UserPromptPart(content=["Look at this image:", image_url]) - mock_msg = MagicMock() - mock_msg.parts = [user_part] - mock_msg.instructions = None + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + found_image = True + assert content_item["content"] == expected_content + else: + events = capture_events() - span = ai_client_span([mock_msg], None, None, None) - span.finish() + with sentry_sdk.start_transaction(op="test", name="test"): + image_url = ImageUrl(url=url, **image_url_kwargs) + user_part = UserPromptPart(content=["Look at this image:", image_url]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 - messages_data = _get_messages_from_span(chat_spans[0]["attributes"]) + span = ai_client_span([mock_msg], None, None, None) + span.finish() - found_image = False - for msg in messages_data: - if "content" not in msg: - continue - for content_item in msg["content"]: - if content_item.get("type") == "image": - found_image = True - assert content_item["content"] == expected_content + (event,) = events + chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + messages_data = _get_messages_from_span(chat_spans[0]["data"]) + + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + found_image = True + assert content_item["content"] == expected_content assert found_image, "Image content item should be found in messages data" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( "url, image_url_kwargs, expected_content", @@ -3032,41 +4142,76 @@ def test_image_url_base64_content_in_span( ], ) async def test_invoke_agent_image_url( - sentry_init, capture_items, url, image_url_kwargs, expected_content + sentry_init, + capture_events, + capture_items, + url, + image_url_kwargs, + expected_content, + stream_gen_ai_spans, ): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) agent = Agent("test", name="test_image_url_agent") - items = capture_items("transaction", "span") image_url = ImageUrl(url=url, **image_url_kwargs) - await agent.run([image_url, "Describe this image"]) - found_image = False + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" - ] - for chat_span in chat_spans: - messages_data = _get_messages_from_span(chat_span["attributes"]) - for msg in messages_data: - if "content" not in msg: - continue - for content_item in msg["content"]: - if content_item.get("type") == "image": - assert content_item["content"] == expected_content - found_image = True + await agent.run([image_url, "Describe this image"]) + + found_image = False + + spans = [item.payload for item in items if item.type == "span"] + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + for chat_span in chat_spans: + messages_data = _get_messages_from_span(chat_span["attributes"]) + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + assert content_item["content"] == expected_content + found_image = True + else: + events = capture_events() + + await agent.run([image_url, "Describe this image"]) + + (transaction,) = events + + found_image = False + + chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + for chat_span in chat_spans: + messages_data = _get_messages_from_span(chat_span["data"]) + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + assert content_item["content"] == expected_content + found_image = True assert found_image, "Image content item should be found in messages data" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_tool_description_in_execute_tool_span(sentry_init, capture_items): +async def test_tool_description_in_execute_tool_span( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): """ Test that tool description from the tool's docstring is included in execute_tool spans. """ @@ -3085,26 +4230,51 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("transaction", "span") + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - result = await agent.run("What is 5 times 3?") - assert result is not None + result = await agent.run("What is 5 times 3?") + assert result is not None - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - tool_spans = [ - s - for s in spans - if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" - ] - assert len(tool_spans) >= 1 + tool_spans = [ + s + for s in spans + if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool" + ] - tool_span = tool_spans[0] - assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers" - assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"] - assert ( - "Multiply two numbers" - in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] - ) + assert len(tool_spans) >= 1 + + tool_span = tool_spans[0] + + assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers" + assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"] + assert ( + "Multiply two numbers" + in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + ) + else: + events = capture_events() + + result = await agent.run("What is 5 times 3?") + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] + + assert len(tool_spans) >= 1 + + tool_span = tool_spans[0] + + assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers" + assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"] + assert ( + "Multiply two numbers" + in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + ) From ab477839ff4c6eb5a2d39b8a364e678c27621b33 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:15:12 +0200 Subject: [PATCH 46/84] cleanup anthropic --- .../integrations/anthropic/test_anthropic.py | 262 ++++++++++-------- 1 file changed, 149 insertions(+), 113 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 1378f777df..2e240b9c8f 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3125,24 +3125,14 @@ async def test_stream_message_with_input_json_delta_async( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - if stream_gen_ai_spans: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) - else: - assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' - ) - assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - == '{"location": "San Francisco, CA"}' - ) + assert ( + span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' + ) + assert ( + span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == '{"location": "San Francisco, CA"}' + ) else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -3758,20 +3748,45 @@ async def test_anthropic_message_truncation_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(): - await client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(): + await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] chat_spans = [ span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] + + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + + tx = next(item.payload for item in items if item.type == "transaction") else: + events = capture_events() + + with start_transaction(): + await client.messages.create( + max_tokens=1024, messages=messages, model="model" + ) + assert len(events) > 0 tx = events[0] assert tx["type"] == "transaction" @@ -3779,32 +3794,23 @@ async def test_anthropic_message_truncation_async( chat_spans = [ span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT ] - assert len(chat_spans) > 0 - chat_span = chat_spans[0] - if stream_gen_ai_spans: - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) - if stream_gen_ai_spans: - tx = next(item.payload for item in items if item.type == "transaction") - else: - pass assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -3846,24 +3852,21 @@ def test_nonstreaming_create_message_with_system_prompt( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(name="anthropic"): - response = client.messages.create( - max_tokens=1024, - messages=messages, - model="model", - system="You are a helpful assistant.", - ) + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) - assert response == EXAMPLE_MESSAGE - usage = response.usage + assert response == EXAMPLE_MESSAGE + usage = response.usage - assert usage.input_tokens == 10 - assert usage.output_tokens == 20 + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" @@ -3909,6 +3912,22 @@ def test_nonstreaming_create_message_with_system_prompt( "end_turn" ] else: + events = capture_events() + + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + assert len(events) == 1 (event,) = events @@ -3933,16 +3952,8 @@ def test_nonstreaming_create_message_with_system_prompt( {"type": "text", "content": "You are a helpful assistant."} ] - if stream_gen_ai_spans: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" @@ -3998,24 +4009,21 @@ async def test_nonstreaming_create_message_with_system_prompt_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(name="anthropic"): - response = await client.messages.create( - max_tokens=1024, - messages=messages, - model="model", - system="You are a helpful assistant.", - ) + with start_transaction(name="anthropic"): + response = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) - assert response == EXAMPLE_MESSAGE - usage = response.usage + assert response == EXAMPLE_MESSAGE + usage = response.usage - assert usage.input_tokens == 10 - assert usage.output_tokens == 20 + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" @@ -4061,6 +4069,22 @@ async def test_nonstreaming_create_message_with_system_prompt_async( "end_turn" ] else: + events = capture_events() + + with start_transaction(name="anthropic"): + response = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + assert len(events) == 1 (event,) = events @@ -4566,24 +4590,21 @@ async def test_stream_message_with_system_prompt_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _, start_transaction(name="anthropic"): - async with client.messages.stream( - max_tokens=1024, - messages=messages, - model="model", - system="You are a helpful assistant.", - ) as stream: - async for event in stream: - pass + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) as stream: + async for event in stream: + pass - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" @@ -4626,6 +4647,22 @@ async def test_stream_message_with_system_prompt_async( assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + async with client.messages.stream( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) as stream: + async for event in stream: + pass + assert len(events) == 1 (event,) = events @@ -4764,26 +4801,7 @@ async def test_streaming_create_message_with_system_prompt_async( async for _ in message: pass - else: - events = capture_events() - with mock.patch.object( - client._client, - "send", - return_value=response, - ) as _, start_transaction(name="anthropic"): - message = await client.messages.create( - max_tokens=1024, - messages=messages, - model="model", - stream=True, - system="You are a helpful assistant.", - ) - - async for _ in message: - pass - - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" @@ -4828,6 +4846,24 @@ async def test_streaming_create_message_with_system_prompt_async( assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True else: + events = capture_events() + + with mock.patch.object( + client._client, + "send", + return_value=response, + ) as _, start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + stream=True, + system="You are a helpful assistant.", + ) + + async for _ in message: + pass + assert len(events) == 1 (event,) = events From 75f4d3aec9a9353135a5d3564b2503af518a6cc7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:23:32 +0200 Subject: [PATCH 47/84] cleanup google-genai --- .../google_genai/test_google_genai.py | 395 ++++++++++-------- 1 file changed, 219 insertions(+), 176 deletions(-) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 3cc4b42bb2..8da5e7ca22 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -222,10 +222,7 @@ def test_nonstreaming_generate_content( if send_default_pii and include_prompts: # Response text is stored as a JSON array - if stream_gen_ai_spans: - response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - else: - response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Parse the JSON array response_texts = json.loads(response_text) assert response_texts == ["Hello! How can I help you today?"] @@ -415,40 +412,52 @@ def get_weather(location: str) -> str: if stream_gen_ai_spans: items = capture_items("span") - else: - events = capture_events() - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - config = create_test_config(tools=[get_weather, mock_tool]) - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="What's the weather?", config=config - ) + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config(tools=[get_weather, mock_tool]) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="What's the weather?", config=config + ) - if stream_gen_ai_spans: invoke_span = next(item.payload for item in items if item.type == "span") # Check that tools are recorded (data is serialized as a string) tools_data_str = invoke_span["attributes"][ SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS ] + + # Parse the JSON string to verify content + tools_data = json.loads(tools_data_str) + assert len(tools_data) == 2 + + # The order of tools may not be guaranteed, so sort by name and description for comparison + sorted_tools = sorted( + tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) + ) else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config(tools=[get_weather, mock_tool]) + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="What's the weather?", config=config + ) + (event,) = events invoke_span = event["spans"][0] # Check that tools are recorded (data is serialized as a string) tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - # Parse the JSON string to verify content - tools_data = json.loads(tools_data_str) - assert len(tools_data) == 2 - # The order of tools may not be guaranteed, so sort by name and description for comparison - if stream_gen_ai_spans: - sorted_tools = sorted( - tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) - ) - else: + # Parse the JSON string to verify content + tools_data = json.loads(tools_data_str) + assert len(tools_data) == 2 + + # The order of tools may not be guaranteed, so sort by name and description for comparison sorted_tools = sorted( tools_data, key=lambda t: (t.get("name", ""), t.get("description", "")) ) @@ -488,16 +497,13 @@ def get_weather(location: str) -> str: if stream_gen_ai_spans: items = capture_items("span") - else: - events = capture_events() - # Execute the wrapped tool - with start_transaction(name="test_tool"): - result = wrapped_weather("San Francisco") + # Execute the wrapped tool + with start_transaction(name="test_tool"): + result = wrapped_weather("San Francisco") - assert result == "The weather in San Francisco is sunny" + assert result == "The weather in San Francisco is sunny" - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 tool_span = next(item.payload for item in items if item.type == "span") @@ -510,6 +516,14 @@ def get_weather(location: str) -> str: == "Get the weather for a location" ) else: + events = capture_events() + + # Execute the wrapped tool + with start_transaction(name="test_tool"): + result = wrapped_weather("San Francisco") + + assert result == "The weather in San Francisco is sunny" + (event,) = events assert len(event["spans"]) == 1 tool_span = event["spans"][0] @@ -538,24 +552,35 @@ def test_error_handling( ) if stream_gen_ai_spans: items = capture_items("event", "transaction") - else: - events = capture_events() - # Mock an error at the HTTP level - with mock.patch.object( - mock_genai_client._api_client, "request", side_effect=Exception("API Error") - ), start_transaction(name="google_genai"), pytest.raises( - Exception, match="API Error" - ): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents="This will fail", - config=create_test_config(), - ) + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, "request", side_effect=Exception("API Error") + ), start_transaction(name="google_genai"), pytest.raises( + Exception, match="API Error" + ): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents="This will fail", + config=create_test_config(), + ) - if stream_gen_ai_spans: (error_event,) = (item.payload for item in items if item.type == "event") else: + events = capture_events() + + # Mock an error at the HTTP level + with mock.patch.object( + mock_genai_client._api_client, "request", side_effect=Exception("API Error") + ), start_transaction(name="google_genai"), pytest.raises( + Exception, match="API Error" + ): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", + contents="This will fail", + config=create_test_config(), + ) + # Should have both transaction and error events assert len(events) == 2 error_event, transaction_event = events @@ -646,58 +671,48 @@ def test_streaming_generate_content( if stream_gen_ai_spans: items = capture_items("span") - else: - events = capture_events() - with mock.patch.object( - mock_genai_client._api_client, "request_streamed", return_value=mock_stream - ), start_transaction(name="google_genai"): - config = create_test_config() - stream = mock_genai_client.models.generate_content_stream( - model="gemini-1.5-flash", contents="Stream me a response", config=config - ) + with mock.patch.object( + mock_genai_client._api_client, "request_streamed", return_value=mock_stream + ), start_transaction(name="google_genai"): + config = create_test_config() + stream = mock_genai_client.models.generate_content_stream( + model="gemini-1.5-flash", contents="Stream me a response", config=config + ) - # Consume the stream (this is what users do with the integration wrapper) - collected_chunks = list(stream) + # Consume the stream (this is what users do with the integration wrapper) + collected_chunks = list(stream) - # Verify we got all chunks - assert len(collected_chunks) == 3 - assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! " - assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " - assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" + # Verify we got all chunks + assert len(collected_chunks) == 3 + assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! " + assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " + assert ( + collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" + ) - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 chat_span = next(item.payload for item in items if item.type == "span") # Check that streaming flag is set on both spans assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - else: - (event,) = events - - assert len(event["spans"]) == 1 - chat_span = event["spans"][0] - - # Check that streaming flag is set on both spans - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - # Verify accumulated response text (all chunks combined) - expected_full_text = "Hello! How can I help you today?" - # Response text is stored as a JSON string - if stream_gen_ai_spans: - chat_response_text = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) - else: - chat_response_text = json.loads( - chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) - assert chat_response_text == [expected_full_text] + # Verify accumulated response text (all chunks combined) + expected_full_text = "Hello! How can I help you today?" + # Response text is stored as a JSON string + if stream_gen_ai_spans: + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + else: + chat_response_text = json.loads( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + assert chat_response_text == [expected_full_text] - # Verify finish reasons (only the final chunk has a finish reason) - # When there's a single finish reason, it's stored as a plain string (not JSON) - if stream_gen_ai_spans: + # Verify finish reasons (only the final chunk has a finish reason) + # When there's a single finish reason, it's stored as a plain string (not JSON) assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"] assert ( chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" @@ -715,6 +730,50 @@ def test_streaming_generate_content( chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" ) else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request_streamed", return_value=mock_stream + ), start_transaction(name="google_genai"): + config = create_test_config() + stream = mock_genai_client.models.generate_content_stream( + model="gemini-1.5-flash", contents="Stream me a response", config=config + ) + + # Consume the stream (this is what users do with the integration wrapper) + collected_chunks = list(stream) + + # Verify we got all chunks + assert len(collected_chunks) == 3 + assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! " + assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " + assert ( + collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" + ) + + (event,) = events + + assert len(event["spans"]) == 1 + chat_span = event["spans"][0] + + # Check that streaming flag is set on both spans + assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + # Verify accumulated response text (all chunks combined) + expected_full_text = "Hello! How can I help you today?" + # Response text is stored as a JSON string + if stream_gen_ai_spans: + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + else: + chat_response_text = json.loads( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + assert chat_response_text == [expected_full_text] + + # Verify finish reasons (only the final chunk has a finish reason) + # When there's a single finish reason, it's stored as a plain string (not JSON) assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"] assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 @@ -745,18 +804,15 @@ def test_span_origin( if stream_gen_ai_spans: items = capture_items("span", "transaction") - else: - events = capture_events() - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - config = create_test_config() - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="Test origin", config=config - ) + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config() + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test origin", config=config + ) - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" @@ -764,6 +820,16 @@ def test_span_origin( for span in spans: assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" else: + events = capture_events() + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ), start_transaction(name="google_genai"): + config = create_test_config() + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents="Test origin", config=config + ) + (event,) = events assert event["contexts"]["trace"]["origin"] == "manual" @@ -893,6 +959,23 @@ def test_multiple_candidates( ) chat_span = next(item.payload for item in items if item.type == "span") + + # Should capture all responses + # Response text is stored as a JSON string when there are multiple responses + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + if isinstance(response_text, str) and response_text.startswith("["): + # It's a JSON array + response_list = json.loads(response_text) + assert response_list == ["Response 1", "Response 2"] + else: + # It's concatenated + assert response_text == "Response 1\nResponse 2" + + # Finish reasons are serialized as JSON + finish_reasons = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] + ) else: events = capture_events() @@ -907,26 +990,19 @@ def test_multiple_candidates( (event,) = events chat_span = event["spans"][0] - # Should capture all responses - # Response text is stored as a JSON string when there are multiple responses - if stream_gen_ai_spans: - response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - else: + # Should capture all responses + # Response text is stored as a JSON string when there are multiple responses response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - if isinstance(response_text, str) and response_text.startswith("["): - # It's a JSON array - response_list = json.loads(response_text) - assert response_list == ["Response 1", "Response 2"] - else: - # It's concatenated - assert response_text == "Response 1\nResponse 2" - # Finish reasons are serialized as JSON - if stream_gen_ai_spans: - finish_reasons = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] - ) - else: + if isinstance(response_text, str) and response_text.startswith("["): + # It's a JSON array + response_list = json.loads(response_text) + assert response_list == ["Response 1", "Response 2"] + else: + # It's concatenated + assert response_text == "Response 1\nResponse 2" + + # Finish reasons are serialized as JSON finish_reasons = json.loads( chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] ) @@ -1482,34 +1558,20 @@ def test_embed_content( # Check input texts if PII is allowed if send_default_pii and include_prompts: - if stream_gen_ai_spans: - input_texts = json.loads( - embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - ) - else: - input_texts = json.loads( - embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - ) + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - if stream_gen_ai_spans: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] - else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if stream_gen_ai_spans: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert ( - embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 - ) - else: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 else: events = capture_events() @@ -1546,34 +1608,20 @@ def test_embed_content( # Check input texts if PII is allowed if send_default_pii and include_prompts: - if stream_gen_ai_spans: - input_texts = json.loads( - embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - ) - else: - input_texts = json.loads( - embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - ) + input_texts = json.loads( + embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - if stream_gen_ai_spans: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] - else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if stream_gen_ai_spans: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert ( - embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 - ) - else: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -1631,12 +1679,8 @@ def test_embed_content_string_input( assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if stream_gen_ai_spans: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - else: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 else: events = capture_events() @@ -1657,12 +1701,8 @@ def test_embed_content_string_input( assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if stream_gen_ai_spans: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - else: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: + assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -2024,6 +2064,12 @@ async def test_async_embed_content_string_input( input_texts = json.loads( embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] ) + + assert input_texts == ["Single text input"] + # Should use token_count from statistics (5), not billable_character_count (10) + # Note: Only available in newer versions with ContentEmbeddingStatistics + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 else: events = capture_events() @@ -2043,13 +2089,9 @@ async def test_async_embed_content_string_input( # Check that single string is handled correctly input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) - assert input_texts == ["Single text input"] - # Should use token_count from statistics (5), not billable_character_count (10) - # Note: Only available in newer versions with ContentEmbeddingStatistics - if stream_gen_ai_spans: - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: - assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 - else: + assert input_texts == ["Single text input"] + # Should use token_count from statistics (5), not billable_character_count (10) + # Note: Only available in newer versions with ContentEmbeddingStatistics if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 @@ -2158,6 +2200,10 @@ async def test_async_embed_content_without_statistics( spans = [item.payload for item in items if item.type == "span"] (embed_span,) = spans + + # No usage tokens since there are no statistics in older versions + # This is expected and the integration should handle it gracefully + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] else: events = capture_events() @@ -2174,11 +2220,8 @@ async def test_async_embed_content_without_statistics( (event,) = events (embed_span,) = event["spans"] - # No usage tokens since there are no statistics in older versions - # This is expected and the integration should handle it gracefully - if stream_gen_ai_spans: - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] - else: + # No usage tokens since there are no statistics in older versions + # This is expected and the integration should handle it gracefully assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] From 8ba3d94dcbb050d5011a1d9b4b6173324ecde48b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:28:21 +0200 Subject: [PATCH 48/84] cleanup huggingface-hub --- .../huggingface_hub/test_huggingface_hub.py | 159 ++++++++---------- 1 file changed, 72 insertions(+), 87 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index d691a58c31..2d94082e7b 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -651,34 +651,23 @@ def test_text_generation_streaming( assert span["name"] == "text_completion test-model" assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - if stream_gen_ai_spans: - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": True, - "gen_ai.usage.total_tokens": 10, - "sentry.environment": "production", - "sentry.op": "gen_ai.text_completion", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - else: - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": True, - "gen_ai.usage.total_tokens": 10, - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "sentry.environment": "production", + "sentry.op": "gen_ai.text_completion", + "sentry.origin": "auto.ai.huggingface_hub", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = "Hello" @@ -721,34 +710,15 @@ def test_text_generation_streaming( assert span["description"] == "text_completion test-model" assert span["origin"] == "auto.ai.huggingface_hub" - if stream_gen_ai_spans: - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": True, - "gen_ai.usage.total_tokens": 10, - "sentry.environment": "production", - "sentry.op": "gen_ai.text_completion", - "sentry.origin": "auto.ai.huggingface_hub", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - else: - expected_data = { - "gen_ai.operation.name": "text_completion", - "gen_ai.request.model": "test-model", - "gen_ai.response.finish_reasons": "length", - "gen_ai.response.streaming": True, - "gen_ai.usage.total_tokens": 10, - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + expected_data = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = "Hello" @@ -783,20 +753,18 @@ def test_chat_completion( integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - else: - events = capture_events() client = get_hf_provider_inference_client() - with sentry_sdk.start_transaction(name="test"): - client.chat_completion( - messages=[{"role": "user", "content": "Hello!"}], - stream=False, - ) - if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=False, + ) + spans = [item.payload for item in items if item.type == "span"] span = None for sp in spans: @@ -849,6 +817,14 @@ def test_chat_completion( assert span["attributes"] == expected_data else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=False, + ) + (transaction,) = events span = None @@ -863,14 +839,9 @@ def test_chat_completion( assert span is not None - if stream_gen_ai_spans: - assert span["attributes"]["sentry.op"] == "gen_ai.chat" - assert span["name"] == "chat test-model" - assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" - else: - assert span["op"] == "gen_ai.chat" - assert span["description"] == "chat test-model" - assert span["origin"] == "auto.ai.huggingface_hub" + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -1397,10 +1368,6 @@ def test_chat_completion_streaming_with_tools( integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - else: - events = capture_events() client = get_hf_provider_inference_client() @@ -1419,17 +1386,21 @@ def test_chat_completion_streaming_with_tools( } ] - with sentry_sdk.start_transaction(name="test"): - _ = list( - client.chat_completion( - messages=[{"role": "user", "content": "What is the weather in Paris?"}], - stream=True, - tools=tools, - tool_choice="auto", + if stream_gen_ai_spans: + items = capture_items("transaction", "span") + + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + messages=[ + {"role": "user", "content": "What is the weather in Paris?"} + ], + stream=True, + tools=tools, + tool_choice="auto", + ) ) - ) - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] span = None for sp in spans: @@ -1487,6 +1458,20 @@ def test_chat_completion_streaming_with_tools( assert span["attributes"] == expected_data else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + messages=[ + {"role": "user", "content": "What is the weather in Paris?"} + ], + stream=True, + tools=tools, + tool_choice="auto", + ) + ) + (transaction,) = events span = None From f156e9295d45205bae45072015b1f2d82e68f70b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:31:12 +0200 Subject: [PATCH 49/84] cleanup langgraph --- .../integrations/langgraph/test_langgraph.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 0052fefa29..991c1f2269 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -171,12 +171,11 @@ def original_compile(self, *args, **kwargs): if stream_gen_ai_spans: items = capture_items("transaction", "span") - with patch("sentry_sdk.integrations.langgraph.StateGraph"): - with start_transaction(): - wrapped_compile = _wrap_state_graph_compile(original_compile) - compiled_graph = wrapped_compile( - graph, model="test-model", checkpointer=None - ) + with patch("sentry_sdk.integrations.langgraph.StateGraph"), start_transaction(): + wrapped_compile = _wrap_state_graph_compile(original_compile) + compiled_graph = wrapped_compile( + graph, model="test-model", checkpointer=None + ) assert compiled_graph is not None assert compiled_graph.name == "test_graph" @@ -209,12 +208,11 @@ def original_compile(self, *args, **kwargs): else: events = capture_events() - with patch("sentry_sdk.integrations.langgraph.StateGraph"): - with start_transaction(): - wrapped_compile = _wrap_state_graph_compile(original_compile) - compiled_graph = wrapped_compile( - graph, model="test-model", checkpointer=None - ) + with patch("sentry_sdk.integrations.langgraph.StateGraph"), start_transaction(): + wrapped_compile = _wrap_state_graph_compile(original_compile) + compiled_graph = wrapped_compile( + graph, model="test-model", checkpointer=None + ) assert compiled_graph is not None assert compiled_graph.name == "test_graph" From 3b03ddf4277132c2e963987f4ae073ae42ae32c9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:35:04 +0200 Subject: [PATCH 50/84] cleanup litellm --- tests/integrations/litellm/test_litellm.py | 75 +++++++++++++--------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index ab60779ed6..8ae8dca99e 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -324,24 +324,21 @@ async def test_async_nonstreaming_chat_completion( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with mock.patch.object( - client.completions._client._client, - "send", - return_value=model_response, - ), start_transaction(name="litellm test"): - await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - client=client, - ) + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) - await GLOBAL_LOGGING_WORKER.flush() - await asyncio.sleep(0.5) + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "litellm test" @@ -373,6 +370,22 @@ async def test_async_nonstreaming_chat_completion( assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 else: + events = capture_events() + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ), start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + assert len(events) == 1 (event,) = events @@ -2385,6 +2398,13 @@ def test_litellm_message_truncation( assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + tx = next(item.payload for item in items if item.type == "transaction") else: events = capture_events() @@ -2415,16 +2435,12 @@ def test_litellm_message_truncation( assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - if stream_gen_ai_spans: - tx = next(item.payload for item in items if item.type == "transaction") - else: - pass + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -2857,6 +2873,10 @@ async def test_async_binary_content_encoding_mixed_content( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) + + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -2882,11 +2902,8 @@ async def test_async_binary_content_encoding_mixed_content( if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 - span = chat_spans[0] - if stream_gen_ai_spans: - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - else: + assert len(chat_spans) == 1 + span = chat_spans[0] messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content_items = [ From 261b9f0c3db16e4c9371667e73d6d53a717b0dad Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 14:41:54 +0200 Subject: [PATCH 51/84] cleanup openai --- tests/integrations/openai/test_openai.py | 1011 ++++++++++++++-------- 1 file changed, 670 insertions(+), 341 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c80b2df513..0da39e842d 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -3039,24 +3039,31 @@ def test_span_origin_streaming_chat( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - client.chat.completions._post = mock.Mock(return_value=returned_stream) - with start_transaction(name="openai tx"): - response_stream = client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) - "".join(map(lambda x: x.choices[0].delta.content, response_stream)) + "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: + events = capture_events() + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + "".join(map(lambda x: x.choices[0].delta.content, response_stream)) + (event,) = events assert event["contexts"]["trace"]["origin"] == "manual" @@ -3124,25 +3131,33 @@ async def test_span_origin_streaming_chat_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(name="openai tx"): - response_stream = await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - async for _ in response_stream: - pass + with start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + async for _ in response_stream: + pass - # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) + # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - if stream_gen_ai_spans: (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: + events = capture_events() + + with start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + async for _ in response_stream: + pass + + # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) + (event,) = events assert event["contexts"]["trace"]["origin"] == "manual" @@ -3178,19 +3193,21 @@ def test_span_origin_embeddings( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(name="openai tx"): - client.embeddings.create(input="hello", model="text-embedding-3-large") + with start_transaction(name="openai tx"): + client.embeddings.create(input="hello", model="text-embedding-3-large") - if stream_gen_ai_spans: (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: + events = capture_events() + + with start_transaction(name="openai tx"): + client.embeddings.create(input="hello", model="text-embedding-3-large") + (event,) = events assert event["contexts"]["trace"]["origin"] == "manual" @@ -3227,19 +3244,25 @@ async def test_span_origin_embeddings_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - with start_transaction(name="openai tx"): - await client.embeddings.create(input="hello", model="text-embedding-3-large") + with start_transaction(name="openai tx"): + await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) - if stream_gen_ai_spans: (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: + events = capture_events() + + with start_transaction(name="openai tx"): + await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + (event,) = events assert event["contexts"]["trace"]["origin"] == "manual" @@ -3840,6 +3863,162 @@ def test_ai_client_span_responses_api( "thread.id": mock.ANY, "thread.name": mock.ANY, } + + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["attributes"] == expected_data else: events = capture_events() @@ -3879,163 +4058,160 @@ def test_ai_client_span_responses_api( "thread.name": mock.ANY, } - param_id = request.node.callspec.id - if "string" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "string" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - } - ] - ), - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "blocks_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif instructions is None or isinstance(instructions, Omit): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - else: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) - if stream_gen_ai_spans: - assert spans[0]["attributes"] == expected_data - else: assert spans[0]["data"] == expected_data @@ -4299,6 +4475,162 @@ async def test_ai_client_span_responses_async_api( "thread.id": mock.ANY, "thread.name": mock.ANY, } + + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["attributes"] == expected_data else: events = capture_events() @@ -4339,163 +4671,160 @@ async def test_ai_client_span_responses_async_api( "thread.name": mock.ANY, } - param_id = request.node.callspec.id - if "string" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "string" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - } - ] - ), - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "blocks_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif instructions is None or isinstance(instructions, Omit): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - else: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) - if stream_gen_ai_spans: - assert spans[0]["attributes"] == expected_data - else: assert spans[0]["data"] == expected_data From 4f8a4c80a2b612b9220ed399ab96da2ec22db096 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:09:01 +0200 Subject: [PATCH 52/84] cleanup openai_agents --- .../openai_agents/test_openai_agents.py | 2474 +++++++++-------- 1 file changed, 1296 insertions(+), 1178 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 46196893d8..60f88cd7f4 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -427,42 +427,10 @@ async def test_agent_invocation_span( invoke_agent_span, ai_client_span = spans assert invoke_agent_span["name"] == "invoke_agent test_agent" - else: - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - events = capture_events() - - result = await agents.Runner.run( - agent, - input, - run_config=test_run_config, - ) - - assert result is not None - assert result.final_output == "Hello, how can I help you?" - - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert invoke_agent_span["description"] == "invoke_agent test_agent" - - # Only first case checks "gen_ai.request.messages" until further input handling work. - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore - if stream_gen_ai_spans: + # Only first case checks "gen_ai.request.messages" until further input handling work. + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore assert "gen_ai.system_instructions" not in ai_client_span["attributes"] assert invoke_agent_span["attributes"][ @@ -475,34 +443,27 @@ async def test_agent_invocation_span( }, ] ) - else: - assert "gen_ai.system_instructions" not in ai_client_span["data"] - - assert invoke_agent_span["data"][ - "gen_ai.request.messages" + elif "string" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" ] == safe_serialize( [ { - "content": [{"text": "Test input", "type": "text"}], - "role": "user", + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", }, ] ) - - elif "string" in param_id: - if stream_gen_ai_spans: + elif "blocks_no_type" in param_id and instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, + {"type": "text", "content": "You are a helpful assistant."}, ] ) - else: - assert ai_client_span["data"][ + elif "blocks_no_type" in param_id: + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -510,10 +471,10 @@ async def test_agent_invocation_span( "type": "text", "content": "You are a coding assistant that talks like a pirate.", }, + {"type": "text", "content": "You are a helpful assistant."}, ] ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - if stream_gen_ai_spans: + elif "blocks" in param_id and instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( @@ -521,29 +482,29 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) - else: - assert ai_client_span["data"][ + elif "blocks" in param_id: + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, {"type": "text", "content": "You are a helpful assistant."}, ] ) - elif "blocks_no_type" in param_id: - if stream_gen_ai_spans: + elif "parts_no_type" in param_id and instructions is None: assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, ] ) - else: - assert ai_client_span["data"][ + elif "parts_no_type" in param_id: + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -552,39 +513,105 @@ async def test_agent_invocation_span( "content": "You are a coding assistant that talks like a pirate.", }, {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, ] ) - elif "blocks" in param_id and instructions is None: # type: ignore - if stream_gen_ai_spans: + elif instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, ] ) else: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, ] ) - elif "blocks" in param_id: - if stream_gen_ai_spans: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" + + assert ( + invoke_agent_span["attributes"]["gen_ai.response.text"] + == "Hello, how can I help you?" + ) + + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + + # Only first case checks "gen_ai.request.messages" until further input handling work. + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore + assert "gen_ai.system_instructions" not in ai_client_span["data"] + + assert invoke_agent_span["data"][ + "gen_ai.request.messages" ] == safe_serialize( [ { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", + "content": [{"text": "Test input", "type": "text"}], + "role": "user", }, - {"type": "text", "content": "You are a helpful assistant."}, ] ) - else: + + elif "string" in param_id: assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( @@ -593,43 +620,37 @@ async def test_agent_invocation_span( "type": "text", "content": "You are a coding assistant that talks like a pirate.", }, - {"type": "text", "content": "You are a helpful assistant."}, ] ) - elif "parts_no_type" in param_id and instructions is None: - if stream_gen_ai_spans: - assert ai_client_span["attributes"][ + elif "blocks_no_type" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, ] ) - else: + elif "blocks_no_type" in param_id: assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, ] ) - elif "parts_no_type" in param_id: - if stream_gen_ai_spans: - assert ai_client_span["attributes"][ + elif "blocks" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, ] ) - else: + elif "blocks" in param_id: assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( @@ -639,12 +660,10 @@ async def test_agent_invocation_span( "content": "You are a coding assistant that talks like a pirate.", }, {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, ] ) - elif instructions is None: # type: ignore - if stream_gen_ai_spans: - assert ai_client_span["attributes"][ + elif "parts_no_type" in param_id and instructions is None: + assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -652,25 +671,24 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) - else: + elif "parts_no_type" in param_id: assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] ) - else: - if stream_gen_ai_spans: - assert ai_client_span["attributes"][ + elif instructions is None: # type: ignore + assert ai_client_span["data"][ "gen_ai.system_instructions" ] == safe_serialize( [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, {"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, ] @@ -689,37 +707,11 @@ async def test_agent_invocation_span( ] ) - if stream_gen_ai_spans: - assert ( - invoke_agent_span["attributes"]["gen_ai.response.text"] - == "Hello, how can I help you?" - ) - else: assert ( invoke_agent_span["data"]["gen_ai.response.text"] == "Hello, how can I help you?" ) - if stream_gen_ai_spans: - assert ( - invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - ) - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - else: assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" assert invoke_agent_span["data"]["gen_ai.system"] == "openai" assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" @@ -782,18 +774,13 @@ async def test_client_span_custom_model( assert result is not None assert result.final_output == "Hello, how can I help you?" - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert ai_client_span["name"] == "chat my-custom-model" - assert ( - ai_client_span["attributes"]["gen_ai.request.model"] - == "my-custom-model" - ) + assert ai_client_span["name"] == "chat my-custom-model" + assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model" else: with patch.object( agent.model._client._client, @@ -814,14 +801,12 @@ async def test_client_span_custom_model( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - assert ai_client_span["description"] == "chat my-custom-model" - assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" + assert ai_client_span["description"] == "chat my-custom-model" + assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -867,50 +852,44 @@ def test_agent_invocation_span_sync_no_pii( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) + (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert ( - invoke_agent_span["attributes"]["gen_ai.operation.name"] - == "invoke_agent" - ) - assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" - assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["name"] == "chat gpt-4" - assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ( - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS - not in invoke_agent_span["attributes"] - ) + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) else: with patch.object( agent.model._client._client, @@ -932,37 +911,35 @@ def test_agent_invocation_span_sync_no_pii( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -1485,19 +1462,17 @@ async def test_handoff_span( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - handoff_span = next( - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF - ) + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) - # Verify handoff span was created - assert handoff_span is not None - assert ( - handoff_span["name"] == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + # Verify handoff span was created + assert handoff_span is not None + assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" else: with patch.object( primary_agent.model._client._client, @@ -1519,19 +1494,19 @@ async def test_handoff_span( assert result is not None - (transaction,) = events - spans = transaction["spans"] - handoff_span = next( - span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF - ) + (transaction,) = events + spans = transaction["spans"] + handoff_span = next( + span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF + ) - # Verify handoff span was created - assert handoff_span is not None - assert ( - handoff_span["description"] - == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] + == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -1657,19 +1632,17 @@ async def test_max_turns_before_handoff_span( max_turns=1, ) - spans = [item.payload for item in items if item.type == "span"] - handoff_span = next( - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF - ) + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) - # Verify handoff span was created - assert handoff_span is not None - assert ( - handoff_span["name"] == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + # Verify handoff span was created + assert handoff_span is not None + assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" else: with patch.object( primary_agent.model._client._client, @@ -1691,19 +1664,19 @@ async def test_max_turns_before_handoff_span( max_turns=1, ) - (error, transaction) = events - spans = transaction["spans"] - handoff_span = next( - span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF - ) + (error, transaction) = events + spans = transaction["spans"] + handoff_span = next( + span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF + ) - # Verify handoff span was created - assert handoff_span is not None - assert ( - handoff_span["description"] - == "handoff from primary_agent to secondary_agent" - ) - assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] + == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -1773,30 +1746,27 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("transaction", "span") - else: - events = capture_events() - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) - if stream_gen_ai_spans: (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" @@ -1815,56 +1785,45 @@ def simple_test_tool(message: str) -> str: for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL ) - else: - (transaction,) = events - spans = transaction["spans"] - agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) - - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, - }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} - ) - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } - ) + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) - if stream_gen_ai_spans: assert agent_span["name"] == "invoke_agent test_agent" assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" @@ -1873,21 +1832,9 @@ def simple_test_tool(message: str) -> str: agent_span_available_tool = json.loads( agent_span["attributes"]["gen_ai.request.available_tools"] )[0] - else: - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - if stream_gen_ai_spans: assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 @@ -1902,26 +1849,11 @@ def simple_test_tool(message: str) -> str: ai_client_span1_available_tool = json.loads( ai_client_span1["attributes"]["gen_ai.request.available_tools"] )[0] - else: - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" - - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" - ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) - if stream_gen_ai_spans: assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span1["attributes"][ "gen_ai.request.messages" @@ -1945,49 +1877,23 @@ def simple_test_tool(message: str) -> str: ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 ) assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 - else: - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None - if stream_gen_ai_spans: assert json.loads( ai_client_span1["attributes"]["gen_ai.response.tool_calls"] ) == [tool_call] - else: - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call - ] - if stream_gen_ai_spans: assert tool_span["name"] == "execute_tool simple_test_tool" assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" @@ -1995,17 +1901,9 @@ def simple_test_tool(message: str) -> str: tool_span_available_tool = json.loads( tool_span["attributes"]["gen_ai.request.available_tools"] )[0] - else: - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - if stream_gen_ai_spans: assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 @@ -2021,10 +1919,192 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - ai_client_span2_available_tool = json.loads( - ai_client_span2["attributes"]["gen_ai.request.available_tools"] + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 + + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + events = capture_events() + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call + ] + + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] )[0] - else: + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 @@ -2041,44 +2121,10 @@ def simple_test_tool(message: str) -> str: ai_client_span2_available_tool = json.loads( ai_client_span2["data"]["gen_ai.request.available_tools"] )[0] - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() - ) - - if stream_gen_ai_spans: - assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["attributes"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 - assert ( - ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() ) - assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - else: + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( [ @@ -2386,117 +2432,113 @@ def simple_test_tool(message: str) -> str: agent_with_tool = test_agent.clone(tools=[simple_test_tool]) if stream_gen_ai_spans: - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a mock response that includes tool calls - tool_call = ResponseFunctionToolCall( - id="call_123", - call_id="call_123", - name="wrong_tool", - type="function_call", - arguments='{"message": "hello"}', - ) + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) - tool_response = ModelResponse( - output=[tool_call], - usage=Usage( - requests=1, input_tokens=10, output_tokens=5, total_tokens=15 - ), - response_id="resp_tool_123", - ) + mock_get_response.side_effect = [tool_response] - mock_get_response.side_effect = [tool_response] + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + items = capture_items("span", "transaction") - items = capture_items("span", "transaction") + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) - with pytest.raises(ModelBehaviorError): - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + spans = [item.payload for item in items if item.type == "span"] - spans = [item.payload for item in items if item.type == "span"] + ( + agent_span, + ai_client_span1, + ) = spans + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - ( - agent_span, - ai_client_span1, - ) = spans - assert agent_span["name"] == "invoke_agent test_agent" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - - # Error due to unrecognized tool in model response. - assert agent_span["status"] == "error" + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "error" else: - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a mock response that includes tool calls - tool_call = ResponseFunctionToolCall( - id="call_123", - call_id="call_123", - name="wrong_tool", - type="function_call", - arguments='{"message": "hello"}', - ) + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) - tool_response = ModelResponse( - output=[tool_call], - usage=Usage( - requests=1, input_tokens=10, output_tokens=5, total_tokens=15 - ), - response_id="resp_tool_123", - ) + mock_get_response.side_effect = [tool_response] - mock_get_response.side_effect = [tool_response] + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, ) - events = capture_events() - with pytest.raises(ModelBehaviorError): - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) - - (error, transaction) = events - spans = transaction["spans"] - ( - agent_span, - ai_client_span1, - ) = spans + (error, transaction) = events + spans = transaction["spans"] + ( + agent_span, + ai_client_span1, + ) = spans - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" - # Error due to unrecognized tool in model response. - assert agent_span["status"] == "internal_error" - assert agent_span["tags"]["status"] == "internal_error" + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "internal_error" + assert agent_span["tags"]["status"] == "internal_error" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -2512,8 +2554,8 @@ async def test_error_handling( Test error handling in agent execution. """ - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( + if stream_gen_ai_spans: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: mock_get_response.side_effect = Exception("Model Error") @@ -2527,86 +2569,84 @@ async def test_error_handling( _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - if stream_gen_ai_spans: - items = capture_items("event", "span", "transaction") + items = capture_items("event", "span", "transaction") - with pytest.raises(Exception, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) - - (error_event,) = ( - item.payload for item in items if item.type == "event" + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config ) - assert error_event["exception"]["values"][0]["type"] == "Exception" - assert error_event["exception"]["values"][0]["value"] == "Model Error" - assert ( - error_event["exception"]["values"][0]["mechanism"]["type"] - == "openai_agents" - ) + (error_event,) = (item.payload for item in items if item.type == "event") - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) - assert transaction["transaction"] == "test_agent workflow" - assert ( - transaction["contexts"]["trace"]["origin"] - == "auto.ai.openai_agents" - ) + (transaction,) = (item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - (invoke_agent_span, ai_client_span) = spans + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert ( - invoke_agent_span["attributes"]["sentry.origin"] - == "auto.ai.openai_agents" - ) + spans = [item.payload for item in items if item.type == "span"] + (invoke_agent_span, ai_client_span) = spans - assert ai_client_span["name"] == "chat gpt-4" - assert ( - ai_client_span["attributes"]["sentry.origin"] - == "auto.ai.openai_agents" - ) - assert ai_client_span["status"] == "error" - else: - events = capture_events() - - with pytest.raises(Exception, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) - - ( - error_event, - transaction, - ) = events - - assert error_event["exception"]["values"][0]["type"] == "Exception" - assert error_event["exception"]["values"][0]["value"] == "Model Error" - assert ( - error_event["exception"]["values"][0]["mechanism"]["type"] - == "openai_agents" - ) + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + ) - spans = transaction["spans"] - (invoke_agent_span, ai_client_span) = spans + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "error" + else: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = Exception("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() - assert transaction["transaction"] == "test_agent workflow" - assert ( - transaction["contexts"]["trace"]["origin"] - == "auto.ai.openai_agents" + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config ) - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["origin"] == "auto.ai.openai_agents" + ( + error_event, + transaction, + ) = events + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) + + spans = transaction["spans"] + (invoke_agent_span, ai_client_span) = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["origin"] == "auto.ai.openai_agents" - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["origin"] == "auto.ai.openai_agents" - assert ai_client_span["status"] == "internal_error" - assert ai_client_span["tags"]["status"] == "internal_error" + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "internal_error" + assert ai_client_span["tags"]["status"] == "internal_error" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -2636,41 +2676,32 @@ async def test_error_captures_input_data( request=model_request, ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[ - OpenAIAgentsIntegration(), - LoggingIntegration(event_level=logging.CRITICAL), - ], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("event", "span") - else: - events = capture_events() - with pytest.raises(InternalServerError, match="Error code: 500"): - await agents.Runner.run(agent, "Test input", run_config=test_run_config) + with pytest.raises(InternalServerError, match="Error code: 500"): + await agents.Runner.run(agent, "Test input", run_config=test_run_config) - if stream_gen_ai_spans: (error_event,) = (item.payload for item in items if item.type == "event") - else: - ( - error_event, - transaction, - ) = events - assert error_event["exception"]["values"][0]["type"] == "InternalServerError" - assert error_event["exception"]["values"][0]["value"] == "Error code: 500" + assert error_event["exception"]["values"][0]["type"] == "InternalServerError" + assert error_event["exception"]["values"][0]["value"] == "Error code: 500" - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] ai_client_span = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -2680,7 +2711,43 @@ async def test_error_captures_input_data( assert ai_client_span["status"] == "error" assert "gen_ai.request.messages" in ai_client_span["attributes"] + request_messages = safe_serialize( + [ + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + ) + assert ( + ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages + ) else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + events = capture_events() + + with pytest.raises(InternalServerError, match="Error code: 500"): + await agents.Runner.run(agent, "Test input", run_config=test_run_config) + + ( + error_event, + transaction, + ) = events + + assert error_event["exception"]["values"][0]["type"] == "InternalServerError" + assert error_event["exception"]["values"][0]["value"] == "Error code: 500" + spans = transaction["spans"] ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0] @@ -2689,16 +2756,11 @@ async def test_error_captures_input_data( assert ai_client_span["tags"]["status"] == "internal_error" assert "gen_ai.request.messages" in ai_client_span["data"] - request_messages = safe_serialize( - [ - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - ) - if stream_gen_ai_spans: - assert ( - ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages + request_messages = safe_serialize( + [ + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] ) - else: assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages @@ -2711,8 +2773,8 @@ async def test_span_status_error( test_agent, stream_gen_ai_spans, ): - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( + if stream_gen_ai_spans: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: mock_get_response.side_effect = ValueError("Model Error") @@ -2726,35 +2788,46 @@ async def test_span_status_error( _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - if stream_gen_ai_spans: - items = capture_items("event", "transaction", "span") + items = capture_items("event", "transaction", "span") - with pytest.raises(ValueError, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) - (error,) = (item.payload for item in items if item.type == "event") - assert error["level"] == "error" + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" - spans = [item.payload for item in items if item.type == "span"] - assert spans[0]["status"] == "error" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) - else: - events = capture_events() + (transaction,) = (item.payload for item in items if item.type == "transaction") + else: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = ValueError("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + events = capture_events() - with pytest.raises(ValueError, match="Model Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) - (error, transaction) = events - assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" assert transaction["contexts"]["trace"]["status"] == "internal_error" @@ -2850,19 +2923,19 @@ async def test_mcp_tool_execution_spans( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("span", "transaction") await agents.Runner.run( @@ -2871,32 +2944,43 @@ async def test_mcp_tool_execution_spans( run_config=test_run_config, ) - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("name") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.input"] - == '{"query": "search term"}' - ) - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" + assert ( + mcp_tool_span["attributes"]["gen_ai.tool.input"] + == '{"query": "search term"}' + ) + assert ( + mcp_tool_span["attributes"]["gen_ai.tool.output"] + == "MCP tool executed successfully" + ) - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "error" - assert mcp_tool_span.get("tags", {}).get("status") != "error" - else: + # Verify no error status since error was None + assert mcp_tool_span.get("status") != "error" + assert mcp_tool_span.get("tags", {}).get("status") != "error" + else: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) events = capture_events() await agents.Runner.run( @@ -2905,31 +2989,29 @@ async def test_mcp_tool_execution_spans( run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + (transaction,) = events + spans = transaction["spans"] - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - assert ( - mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' - ) - assert ( - mcp_tool_span["data"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("description") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break + + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' + assert ( + mcp_tool_span["data"]["gen_ai.tool.output"] + == "MCP tool executed successfully" + ) - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "internal_error" - assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" + # Verify no error status since error was None + assert mcp_tool_span.get("status") != "internal_error" + assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -3023,30 +3105,27 @@ async def test_mcp_tool_execution_with_error( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("span", "transaction") - else: - events = capture_events() - await agents.Runner.run( - agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + agent, + "Please use failing MCP tool", + run_config=test_run_config, + ) - if stream_gen_ai_spans: spans = [item.payload for item in items if item.type == "span"] # Find the MCP execute_tool span with error @@ -3066,6 +3145,25 @@ async def test_mcp_tool_execution_with_error( # Verify error status was set assert mcp_tool_span["status"] == "error" else: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + await agents.Runner.run( + agent, + "Please use failing MCP tool", + run_config=test_run_config, + ) + (transaction,) = events spans = transaction["spans"] @@ -3179,19 +3277,19 @@ async def test_mcp_tool_execution_without_pii( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("span", "transaction") await agents.Runner.run( @@ -3200,24 +3298,35 @@ async def test_mcp_tool_execution_without_pii( run_config=test_run_config, ) - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("name") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] - assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] - else: + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] + assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] + else: + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) events = capture_events() await agents.Runner.run( @@ -3226,24 +3335,24 @@ async def test_mcp_tool_execution_without_pii( run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + (transaction,) = events + spans = transaction["spans"] - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if span.get("description") == "execute_tool test_mcp_tool": + mcp_tool_span = span + break - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["data"] - assert "gen_ai.tool.output" not in mcp_tool_span["data"] + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["data"] + assert "gen_ai.tool.output" not in mcp_tool_span["data"] @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -3269,50 +3378,67 @@ async def test_multiple_agents_asyncio( nonstreaming_responses_model_response, serialize_pydantic=True ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - async def run(): - await agents.Runner.run( - starting_agent=agent, - input="Test input", - run_config=test_run_config, + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - if stream_gen_ai_spans: items = capture_items("span", "transaction") + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) + await asyncio.gather(*[run() for _ in range(3)]) - txn1, txn2, txn3 = ( - item.payload for item in items if item.type == "transaction" - ) + txn1, txn2, txn3 = ( + item.payload for item in items if item.type == "transaction" + ) - assert txn1["transaction"] == "test_agent workflow" - assert txn2["transaction"] == "test_agent workflow" - else: + assert txn1["transaction"] == "test_agent workflow" + assert txn2["transaction"] == "test_agent workflow" + assert txn3["transaction"] == "test_agent workflow" + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) events = capture_events() - await asyncio.gather(*[run() for _ in range(3)]) + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) - assert len(events) == 3 - txn1, txn2, txn3 = events + await asyncio.gather(*[run() for _ in range(3)]) - assert txn1["type"] == "transaction" - assert txn1["transaction"] == "test_agent workflow" - assert txn2["type"] == "transaction" - assert txn2["transaction"] == "test_agent workflow" - assert txn3["type"] == "transaction" + assert len(events) == 3 + txn1, txn2, txn3 = events - assert txn3["transaction"] == "test_agent workflow" + assert txn1["type"] == "transaction" + assert txn1["transaction"] == "test_agent workflow" + assert txn2["type"] == "transaction" + assert txn2["transaction"] == "test_agent workflow" + assert txn3["type"] == "transaction" + assert txn3["transaction"] == "test_agent workflow" # Test input messages with mixed roles including "ai" @@ -3431,19 +3557,18 @@ def failing_tool(message: str) -> str: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - if stream_gen_ai_spans: + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) items = capture_items("span", "transaction") # Note: The agents library catches tool exceptions internally, @@ -3454,27 +3579,38 @@ def failing_tool(message: str) -> str: run_config=test_run_config, ) - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - # Find the execute_tool span - execute_tool_span = None - for span in spans: - description = span.get("name", "") - if description is not None and description.startswith( - "execute_tool failing_tool" - ): - execute_tool_span = span - break + # Find the execute_tool span + execute_tool_span = None + for span in spans: + description = span.get("name", "") + if description is not None and description.startswith( + "execute_tool failing_tool" + ): + execute_tool_span = span + break - # Verify the execute_tool span was created - assert execute_tool_span is not None, "execute_tool span was not created" - assert execute_tool_span["name"] == "execute_tool failing_tool" - assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool" + # Verify the execute_tool span was created + assert execute_tool_span is not None, "execute_tool span was not created" + assert execute_tool_span["name"] == "execute_tool failing_tool" + assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool" - # Verify error status was set (this is the key test for our patch) - # The span should be marked as error because the tool execution failed - assert execute_tool_span["status"] == "error" - else: + # Verify error status was set (this is the key test for our patch) + # The span should be marked as error because the tool execution failed + assert execute_tool_span["status"] == "error" + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) events = capture_events() # Note: The agents library catches tool exceptions internally, @@ -3485,28 +3621,28 @@ def failing_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + (transaction,) = events + spans = transaction["spans"] - # Find the execute_tool span - execute_tool_span = None - for span in spans: - description = span.get("description", "") - if description is not None and description.startswith( - "execute_tool failing_tool" - ): - execute_tool_span = span - break + # Find the execute_tool span + execute_tool_span = None + for span in spans: + description = span.get("description", "") + if description is not None and description.startswith( + "execute_tool failing_tool" + ): + execute_tool_span = span + break - # Verify the execute_tool span was created - assert execute_tool_span is not None, "execute_tool span was not created" - assert execute_tool_span["description"] == "execute_tool failing_tool" - assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool" + # Verify the execute_tool span was created + assert execute_tool_span is not None, "execute_tool span was not created" + assert execute_tool_span["description"] == "execute_tool failing_tool" + assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool" - # Verify error status was set (this is the key test for our patch) - # The span should be marked as error because the tool execution failed - assert execute_tool_span["status"] == "internal_error" - assert execute_tool_span["tags"]["status"] == "internal_error" + # Verify error status was set (this is the key test for our patch) + # The span should be marked as error because the tool execution failed + assert execute_tool_span["status"] == "internal_error" + assert execute_tool_span["tags"]["status"] == "internal_error" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -3566,19 +3702,18 @@ async def test_invoke_agent_span_includes_usage_data( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - if stream_gen_ai_spans: + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) items = capture_items("span", "transaction") result = await agents.Runner.run( @@ -3587,30 +3722,38 @@ async def test_invoke_agent_span_includes_usage_data( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) - # Verify invoke_agent span has usage data from context_wrapper - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] + # Verify invoke_agent span has usage data from context_wrapper + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"] - assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 - assert ( - invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - ) - assert ( - invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] - == 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5 + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - else: events = capture_events() result = await agents.Runner.run( @@ -3619,25 +3762,23 @@ async def test_invoke_agent_span_includes_usage_data( assert result is not None - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) - # Verify invoke_agent span has usage data from context_wrapper - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] - assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] - - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 - assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 - assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ( - invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 - ) + # Verify invoke_agent span has usage data from context_wrapper + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"] + assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"] + + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30 + assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -3697,19 +3838,18 @@ async def test_ai_client_span_includes_response_model( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - if stream_gen_ai_spans: + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) items = capture_items("span", "transaction") result = await agents.Runner.run( @@ -3718,21 +3858,30 @@ async def test_ai_client_span_includes_response_model( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify ai_client span has response model from API response - assert ai_client_span["name"] == "chat gpt-4" - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ( - ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" + # Verify ai_client span has response model from API response + assert ai_client_span["name"] == "chat gpt-4" + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - else: events = capture_events() result = await agents.Runner.run( @@ -3741,18 +3890,14 @@ async def test_ai_client_span_includes_response_model( assert result is not None - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - # Verify ai_client span has response model from API response - assert ai_client_span["description"] == "chat gpt-4" - assert "gen_ai.response.model" in ai_client_span["data"] - assert ( - ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + # Verify ai_client span has response model from API response + assert ai_client_span["description"] == "chat gpt-4" + assert "gen_ai.response.model" in ai_client_span["data"] + assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -3817,18 +3962,18 @@ async def test_ai_client_span_response_model_with_chat_completions( serialize_pydantic=True, ) - with patch.object( - agent.model._client._client, - "send", - return_value=response, - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) + if stream_gen_ai_spans: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) - if stream_gen_ai_spans: items = capture_items("span", "transaction") result = await agents.Runner.run( @@ -3837,20 +3982,28 @@ async def test_ai_client_span_response_model_with_chat_completions( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify response model from API response is captured - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ( - ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4o-mini-2024-07-18" + # Verify response model from API response is captured + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4o-mini-2024-07-18" + ) + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - else: events = capture_events() result = await agents.Runner.run( @@ -3859,18 +4012,15 @@ async def test_ai_client_span_response_model_with_chat_completions( assert result is not None - (transaction,) = events - spans = transaction["spans"] - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - # Verify response model from API response is captured - assert "gen_ai.response.model" in ai_client_span["data"] - assert ( - ai_client_span["data"]["gen_ai.response.model"] - == "gpt-4o-mini-2024-07-18" - ) + # Verify response model from API response is captured + assert "gen_ai.response.model" in ai_client_span["data"] + assert ( + ai_client_span["data"]["gen_ai.response.model"] == "gpt-4o-mini-2024-07-18" + ) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -4122,32 +4272,30 @@ async def test_invoke_agent_span_includes_response_model( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify invoke_agent span has response model from API - assert invoke_agent_span["name"] == "invoke_agent test_agent" - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Verify invoke_agent span has response model from API + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) - # Also verify ai_client span has it - assert "gen_ai.response.model" in ai_client_span["attributes"] - assert ( - ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Also verify ai_client span has it + assert "gen_ai.response.model" in ai_client_span["attributes"] + assert ( + ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) else: with patch.object( agent.model._client._client, @@ -4168,28 +4316,23 @@ async def test_invoke_agent_span_includes_response_model( assert result is not None - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - # Verify invoke_agent span has response model from API - assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert ( - invoke_agent_span["data"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Verify invoke_agent span has response model from API + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) - # Also verify ai_client span has it - assert "gen_ai.response.model" in ai_client_span["data"] - assert ( - ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" - ) + # Also verify ai_client span has it + assert "gen_ai.response.model" in ai_client_span["data"] + assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -4311,28 +4454,27 @@ def calculator(a: int, b: int) -> int: assert result is not None - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = spans[0] - first_ai_client_span = spans[1] - second_ai_client_span = spans[3] # After tool span + invoke_agent_span = spans[0] + first_ai_client_span = spans[1] + second_ai_client_span = spans[3] # After tool span - # Invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["attributes"] - assert ( - invoke_agent_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) - # Each ai_client span has its own response model from the API - assert ( - first_ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4-0613" - ) - assert ( - second_ai_client_span["attributes"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Each ai_client span has its own response model from the API + assert ( + first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613" + ) + assert ( + second_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) else: with patch.object( agent_with_tool.model._client._client, @@ -4355,26 +4497,25 @@ def calculator(a: int, b: int) -> int: assert result is not None - (transaction,) = events - spans = transaction["spans"] + (transaction,) = events + spans = transaction["spans"] - invoke_agent_span = spans[0] - first_ai_client_span = spans[1] - second_ai_client_span = spans[3] # After tool span + invoke_agent_span = spans[0] + first_ai_client_span = spans[1] + second_ai_client_span = spans[3] # After tool span - # Invoke_agent span uses the LAST response model - assert "gen_ai.response.model" in invoke_agent_span["data"] - assert ( - invoke_agent_span["data"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["data"] + assert ( + invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" + ) - # Each ai_client span has its own response model from the API - assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613" - assert ( - second_ai_client_span["data"]["gen_ai.response.model"] - == "gpt-4.1-2025-04-14" - ) + # Each ai_client span has its own response model from the API + assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613" + assert ( + second_ai_client_span["data"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) def test_openai_agents_message_truncation(sentry_init, capture_items): @@ -4597,28 +4738,25 @@ async def test_streaming_ttft_on_chat_span( agent_with_tool.model._client._client, "send", return_value=response, - ) as _: - with sentry_sdk.start_transaction( - name="test_ttft", sampled=True - ) as transaction: - result = agents.Runner.run_streamed( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) + ) as _, sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction: + result = agents.Runner.run_streamed( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) - async for event in result.stream_events(): - pass + async for event in result.stream_events(): + pass - # Verify TTFT is recorded on the chat span (must be inside transaction context) - chat_spans = [ - s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat" - ] - assert len(chat_spans) >= 1 - chat_span = chat_spans[0] + # Verify TTFT is recorded on the chat span (must be inside transaction context) + chat_spans = [ + s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data - assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data + assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -4671,39 +4809,31 @@ async def test_conversation_id_on_all_spans( assert result is not None - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify workflow span (transaction) has conversation_id - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) + # Verify workflow span (transaction) has conversation_id + (transaction,) = (item.payload for item in items if item.type == "transaction") - assert ( - transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] - == "conv_test_123" - ) + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_test_123" + ) - # Verify invoke_agent span has conversation_id - assert ( - invoke_agent_span["attributes"]["gen_ai.conversation.id"] - == "conv_test_123" - ) + # Verify invoke_agent span has conversation_id + assert ( + invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + ) - # Verify ai_client span has conversation_id - assert ( - ai_client_span["attributes"]["gen_ai.conversation.id"] - == "conv_test_123" - ) + # Verify ai_client span has conversation_id + assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" else: with patch.object( agent.model._client._client, @@ -4726,28 +4856,24 @@ async def test_conversation_id_on_all_spans( assert result is not None - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - # Verify workflow span (transaction) has conversation_id - assert ( - transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] - == "conv_test_123" - ) + # Verify workflow span (transaction) has conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_test_123" + ) - # Verify invoke_agent span has conversation_id - assert ( - invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123" - ) + # Verify invoke_agent span has conversation_id + assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123" - # Verify ai_client span has conversation_id - assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + # Verify ai_client span has conversation_id + assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -4981,30 +5107,24 @@ async def test_no_conversation_id_when_not_provided( assert result is not None - (transaction,) = ( - item.payload for item in items if item.type == "transaction" - ) + (transaction,) = (item.payload for item in items if item.type == "transaction") - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - # Verify conversation_id is NOT set on any spans - assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( - "attributes", {} - ) - assert "gen_ai.conversation.id" not in invoke_agent_span.get( - "attributes", {} - ) - assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( + "attributes", {} + ) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) else: with patch.object( agent.model._client._client, @@ -5026,18 +5146,16 @@ async def test_no_conversation_id_when_not_provided( assert result is not None - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span = next( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT) - # Verify conversation_id is NOT set on any spans - assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( - "data", {} - ) - assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {}) - assert "gen_ai.conversation.id" not in ai_client_span.get("data", {}) + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( + "data", {} + ) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("data", {}) From 14e379ff0e373503bd2dd6883abcfb6f62a43374 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:12:57 +0200 Subject: [PATCH 53/84] fix pydantic-ai test --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index d60058e4ce..b2dfe76988 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -1366,9 +1366,6 @@ async def test_message_history( _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - # First message - await agent.run("Hello, I'm Alice") - # Second message with history from pydantic_ai import messages @@ -1385,6 +1382,9 @@ async def test_message_history( if stream_gen_ai_spans: items = capture_items("transaction", "span") + # First message + await agent.run("Hello, I'm Alice") + await agent.run("What is my name?", message_history=history) # We should have 2 transactions @@ -1407,6 +1407,9 @@ async def test_message_history( else: events = capture_events() + # First message + await agent.run("Hello, I'm Alice") + await agent.run("What is my name?", message_history=history) # We should have 2 transactions From 596db319bb01825003371d0331ce731b64905895 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:31:07 +0200 Subject: [PATCH 54/84] fix tracing tests --- tests/tracing/test_decorator.py | 514 +++++++++++++++++++++----------- tests/tracing/test_misc.py | 35 ++- 2 files changed, 363 insertions(+), 186 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index a71ca5588f..30c14b8ea6 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -121,12 +121,13 @@ async def _some_function_traced(a, b, c): ) -def test_span_templates_ai_dicts(sentry_init, capture_items): - sentry_init( - traces_sample_rate=1.0, - ) - items = capture_items("span") - +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_templates_ai_dicts( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2): return { @@ -165,89 +166,157 @@ def my_agent(): presence_penalty=2.0, ) - with sentry_sdk.start_transaction(name="test-transaction"): - my_agent() + if stream_gen_ai_spans: + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + items = capture_items("span") - (agent_span, tool_span, chat_span) = ( - item.payload for item in items if item.type == "span" - ) + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent() - assert ( - agent_span["name"] - == "invoke_agent test_decorator.test_span_templates_ai_dicts..my_agent" - ) - assert agent_span["attributes"] == { - "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts..my_agent", - "gen_ai.operation.name": "invoke_agent", - "sentry.environment": "production", - "sentry.op": "gen_ai.invoke_agent", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - assert ( - tool_span["name"] - == "execute_tool test_decorator.test_span_templates_ai_dicts..my_tool" - ) - assert tool_span["attributes"] == { - "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts..my_tool", - "gen_ai.operation.name": "execute_tool", - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "sentry.environment": "production", - "sentry.op": "gen_ai.execute_tool", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - assert "gen_ai.tool.description" not in tool_span["attributes"] - - assert chat_span["name"] == "chat my-gpt-4o-mini" - assert chat_span["attributes"] == { - "gen_ai.operation.name": "chat", - "gen_ai.request.frequency_penalty": 1.0, - "gen_ai.request.max_tokens": 100, - "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", - "gen_ai.request.model": "my-gpt-4o-mini", - "gen_ai.request.presence_penalty": 2.0, - "gen_ai.request.temperature": 0.5, - "gen_ai.request.top_k": 40, - "gen_ai.request.top_p": 0.9, - "gen_ai.response.model": "my-gpt-4o-mini-v123", - "gen_ai.usage.input_tokens": 11, - "gen_ai.usage.output_tokens": 22, - "gen_ai.usage.total_tokens": 33, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - -def test_span_templates_ai_objects(sentry_init, capture_items): - sentry_init( - traces_sample_rate=1.0, - ) - items = capture_items("span") + (agent_span, tool_span, chat_span) = ( + item.payload for item in items if item.type == "span" + ) + + assert ( + agent_span["name"] + == "invoke_agent test_decorator.test_span_templates_ai_dicts..my_agent" + ) + assert agent_span["attributes"] == { + "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts..my_agent", + "gen_ai.operation.name": "invoke_agent", + "sentry.environment": "production", + "sentry.op": "gen_ai.invoke_agent", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert ( + tool_span["name"] + == "execute_tool test_decorator.test_span_templates_ai_dicts..my_tool" + ) + assert tool_span["attributes"] == { + "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts..my_tool", + "gen_ai.operation.name": "execute_tool", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.execute_tool", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert "gen_ai.tool.description" not in tool_span["attributes"] + + assert chat_span["name"] == "chat my-gpt-4o-mini" + assert chat_span["attributes"] == { + "gen_ai.operation.name": "chat", + "gen_ai.request.frequency_penalty": 1.0, + "gen_ai.request.max_tokens": 100, + "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", + "gen_ai.request.model": "my-gpt-4o-mini", + "gen_ai.request.presence_penalty": 2.0, + "gen_ai.request.temperature": 0.5, + "gen_ai.request.top_k": 40, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "my-gpt-4o-mini-v123", + "gen_ai.usage.input_tokens": 11, + "gen_ai.usage.output_tokens": 22, + "gen_ai.usage.total_tokens": 33, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + events = capture_events() + + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent() + + (event,) = events + (agent_span, tool_span, chat_span) = event["spans"] + + assert agent_span["op"] == "gen_ai.invoke_agent" + assert ( + agent_span["description"] + == "invoke_agent test_decorator.test_span_templates_ai_dicts..my_agent" + ) + assert agent_span["data"] == { + "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts..my_agent", + "gen_ai.operation.name": "invoke_agent", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert tool_span["op"] == "gen_ai.execute_tool" + assert ( + tool_span["description"] + == "execute_tool test_decorator.test_span_templates_ai_dicts..my_tool" + ) + assert tool_span["data"] == { + "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts..my_tool", + "gen_ai.operation.name": "execute_tool", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert "gen_ai.tool.description" not in tool_span["data"] + + assert chat_span["op"] == "gen_ai.chat" + assert chat_span["description"] == "chat my-gpt-4o-mini" + assert chat_span["data"] == { + "gen_ai.operation.name": "chat", + "gen_ai.request.frequency_penalty": 1.0, + "gen_ai.request.max_tokens": 100, + "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", + "gen_ai.request.model": "my-gpt-4o-mini", + "gen_ai.request.presence_penalty": 2.0, + "gen_ai.request.temperature": 0.5, + "gen_ai.request.top_k": 40, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "my-gpt-4o-mini-v123", + "gen_ai.usage.input_tokens": 11, + "gen_ai.usage.output_tokens": 22, + "gen_ai.usage.total_tokens": 33, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +def test_span_templates_ai_objects( + sentry_init, + capture_events, + capture_items, + stream_gen_ai_spans, +): @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2): """This is a tool function.""" @@ -290,91 +359,155 @@ def my_agent(): presence_penalty=2.0, ) - with sentry_sdk.start_transaction(name="test-transaction"): - my_agent() + if stream_gen_ai_spans: + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + items = capture_items("span") - (agent_span, tool_span, chat_span) = ( - item.payload for item in items if item.type == "span" - ) + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent() - assert ( - agent_span["name"] - == "invoke_agent test_decorator.test_span_templates_ai_objects..my_agent" - ) - assert agent_span["attributes"] == { - "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects..my_agent", - "gen_ai.operation.name": "invoke_agent", - "sentry.environment": "production", - "sentry.op": "gen_ai.invoke_agent", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - assert ( - tool_span["name"] - == "execute_tool test_decorator.test_span_templates_ai_objects..my_tool" - ) - assert tool_span["attributes"] == { - "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects..my_tool", - "gen_ai.tool.description": "This is a tool function.", - "gen_ai.operation.name": "execute_tool", - "gen_ai.usage.input_tokens": 10, - "gen_ai.usage.output_tokens": 20, - "gen_ai.usage.total_tokens": 30, - "sentry.environment": "production", - "sentry.op": "gen_ai.execute_tool", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - assert chat_span["name"] == "chat my-gpt-4o-mini" - assert chat_span["attributes"] == { - "gen_ai.operation.name": "chat", - "gen_ai.request.frequency_penalty": 1.0, - "gen_ai.request.max_tokens": 100, - "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", - "gen_ai.request.model": "my-gpt-4o-mini", - "gen_ai.request.presence_penalty": 2.0, - "gen_ai.request.temperature": 0.5, - "gen_ai.request.top_k": 40, - "gen_ai.request.top_p": 0.9, - "gen_ai.response.model": "my-gpt-4o-mini-v123", - "gen_ai.usage.input_tokens": 11, - "gen_ai.usage.output_tokens": 22, - "gen_ai.usage.total_tokens": 33, - "sentry.environment": "production", - "sentry.op": "gen_ai.chat", - "sentry.origin": "manual", - "sentry.release": mock.ANY, - "sentry.sdk.name": "sentry.python", - "sentry.sdk.version": mock.ANY, - "sentry.segment.id": mock.ANY, - "sentry.segment.name": "test-transaction", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + (agent_span, tool_span, chat_span) = ( + item.payload for item in items if item.type == "span" + ) + assert ( + agent_span["name"] + == "invoke_agent test_decorator.test_span_templates_ai_objects..my_agent" + ) + assert agent_span["attributes"] == { + "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects..my_agent", + "gen_ai.operation.name": "invoke_agent", + "sentry.environment": "production", + "sentry.op": "gen_ai.invoke_agent", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } -@pytest.mark.parametrize("send_default_pii", [True, False]) -def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii): - sentry_init( - traces_sample_rate=1.0, - send_default_pii=send_default_pii, - ) - items = capture_items("span") + assert ( + tool_span["name"] + == "execute_tool test_decorator.test_span_templates_ai_objects..my_tool" + ) + assert tool_span["attributes"] == { + "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects..my_tool", + "gen_ai.tool.description": "This is a tool function.", + "gen_ai.operation.name": "execute_tool", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.execute_tool", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert chat_span["name"] == "chat my-gpt-4o-mini" + assert chat_span["attributes"] == { + "gen_ai.operation.name": "chat", + "gen_ai.request.frequency_penalty": 1.0, + "gen_ai.request.max_tokens": 100, + "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", + "gen_ai.request.model": "my-gpt-4o-mini", + "gen_ai.request.presence_penalty": 2.0, + "gen_ai.request.temperature": 0.5, + "gen_ai.request.top_k": 40, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "my-gpt-4o-mini-v123", + "gen_ai.usage.input_tokens": 11, + "gen_ai.usage.output_tokens": 22, + "gen_ai.usage.total_tokens": 33, + "sentry.environment": "production", + "sentry.op": "gen_ai.chat", + "sentry.origin": "manual", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "test-transaction", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + events = capture_events() + + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent() + + (event,) = events + (agent_span, tool_span, chat_span) = event["spans"] + + assert agent_span["op"] == "gen_ai.invoke_agent" + assert ( + agent_span["description"] + == "invoke_agent test_decorator.test_span_templates_ai_objects..my_agent" + ) + assert agent_span["data"] == { + "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects..my_agent", + "gen_ai.operation.name": "invoke_agent", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert tool_span["op"] == "gen_ai.execute_tool" + assert ( + tool_span["description"] + == "execute_tool test_decorator.test_span_templates_ai_objects..my_tool" + ) + assert tool_span["data"] == { + "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects..my_tool", + "gen_ai.tool.description": "This is a tool function.", + "gen_ai.operation.name": "execute_tool", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 20, + "gen_ai.usage.total_tokens": 30, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert chat_span["op"] == "gen_ai.chat" + assert chat_span["description"] == "chat my-gpt-4o-mini" + assert chat_span["data"] == { + "gen_ai.operation.name": "chat", + "gen_ai.request.frequency_penalty": 1.0, + "gen_ai.request.max_tokens": 100, + "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]", + "gen_ai.request.model": "my-gpt-4o-mini", + "gen_ai.request.presence_penalty": 2.0, + "gen_ai.request.temperature": 0.5, + "gen_ai.request.top_k": 40, + "gen_ai.request.top_p": 0.9, + "gen_ai.response.model": "my-gpt-4o-mini-v123", + "gen_ai.usage.input_tokens": 11, + "gen_ai.usage.output_tokens": 22, + "gen_ai.usage.total_tokens": 33, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_span_templates_ai_pii( + sentry_init, + capture_events, + capture_items, + send_default_pii, + stream_gen_ai_spans, +): @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL) def my_tool(arg1, arg2, **kwargs): """This is a tool function.""" @@ -400,17 +533,44 @@ def my_agent(*args, **kwargs): ) return "agent_output" - with sentry_sdk.start_transaction(name="test-transaction"): - my_agent(22, 33, arg1=44, arg2=55) + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + + if stream_gen_ai_spans: + items = capture_items("span") - (_, tool_span, _) = (item.payload for item in items if item.type == "span") + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent(22, 33, arg1=44, arg2=55) - if send_default_pii: - assert ( - tool_span["attributes"]["gen_ai.tool.input"] - == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}" - ) - assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'" + (_, tool_span, _) = (item.payload for item in items if item.type == "span") + + if send_default_pii: + assert ( + tool_span["attributes"]["gen_ai.tool.input"] + == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}" + ) + assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'" + else: + assert "gen_ai.tool.input" not in tool_span["attributes"] + assert "gen_ai.tool.output" not in tool_span["attributes"] else: - assert "gen_ai.tool.input" not in tool_span["attributes"] - assert "gen_ai.tool.output" not in tool_span["attributes"] + events = capture_events() + + with sentry_sdk.start_transaction(name="test-transaction"): + my_agent(22, 33, arg1=44, arg2=55) + + (event,) = events + (_, tool_span, _) = event["spans"] + + if send_default_pii: + assert ( + tool_span["data"]["gen_ai.tool.input"] + == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}" + ) + assert tool_span["data"]["gen_ai.tool.output"] == "'tool_output'" + else: + assert "gen_ai.tool.input" not in tool_span["data"] + assert "gen_ai.tool.output" not in tool_span["data"] diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 85dfd6a302..1119f42461 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -646,24 +646,41 @@ def test_conversation_id_propagates_to_span_with_ai_op( span_data = event["spans"][0]["data"] assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test" + @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_conversation_id_propagates_to_span_with_gen_ai_op( - self, sentry_init, capture_items + self, sentry_init, capture_events, capture_items, stream_gen_ai_spans ): """Span with gen_ai.* op should get conversation_id.""" sentry_init( traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - items = capture_items("span") - scope = sentry_sdk.get_current_scope() - scope.set_conversation_id("conv-gen-ai-op-test") + if stream_gen_ai_spans: + items = capture_items("span") - with sentry_sdk.start_transaction(name="test-tx"): - with start_span(op="gen_ai.invoke_agent"): - pass + scope = sentry_sdk.get_current_scope() + scope.set_conversation_id("conv-gen-ai-op-test") + + with sentry_sdk.start_transaction(name="test-tx"): + with start_span(op="gen_ai.invoke_agent"): + pass + + spans = [item.payload for item in items if item.type == "span"] + span_data = spans[0]["attributes"] + else: + events = capture_events() + + scope = sentry_sdk.get_current_scope() + scope.set_conversation_id("conv-gen-ai-op-test") + + with sentry_sdk.start_transaction(name="test-tx"): + with start_span(op="gen_ai.invoke_agent"): + pass + + (event,) = events + span_data = event["spans"][0]["data"] - spans = [item.payload for item in items if item.type == "span"] - span_data = spans[0]["attributes"] assert span_data.get("gen_ai.conversation.id") == "conv-gen-ai-op-test" def test_conversation_id_not_propagated_to_non_ai_span( From a2adf96fd93c40536f91266447b13fa477437dd2 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:36:25 +0200 Subject: [PATCH 55/84] fix tests --- tests/tracing/test_decorator.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index 30c14b8ea6..8d7c97fdbf 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -359,11 +359,12 @@ def my_agent(): presence_penalty=2.0, ) + sentry_init( + traces_sample_rate=1.0, + _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + ) + if stream_gen_ai_spans: - sentry_init( - traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) items = capture_items("span") with sentry_sdk.start_transaction(name="test-transaction"): From 401109aeb1f2b9d12431d7becabab4341b91f9b8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:55:30 +0200 Subject: [PATCH 56/84] feat: Remove truncation when stream_gen_ai_spans is enabled --- sentry_sdk/ai/utils.py | 8 + .../integrations/anthropic/test_anthropic.py | 329 +++------- .../google_genai/test_google_genai.py | 166 ++--- .../integrations/langchain/test_langchain.py | 137 ++--- .../integrations/langgraph/test_langgraph.py | 251 ++------ tests/integrations/litellm/test_litellm.py | 104 +--- tests/integrations/openai/test_openai.py | 60 +- .../openai_agents/test_openai_agents.py | 571 ++++++------------ 8 files changed, 439 insertions(+), 1187 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 8efa077ce5..4bd65ced76 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -741,6 +741,10 @@ def truncate_and_annotate_messages( scope: "Any", max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": + client = sentry_sdk.get_client() + if client.options["_experiments"].get("stream_gen_ai_spans", False): + return messages + if not messages: return None @@ -761,6 +765,10 @@ def truncate_and_annotate_embedding_inputs( scope: "Any", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": + client = sentry_sdk.get_client() + if client.options["_experiments"].get("stream_gen_ai_spans", False): + return messages + if not messages: return None diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 2e240b9c8f..02de047711 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3625,20 +3625,14 @@ def mock_messages_create(*args, **kwargs): assert stored_messages[0]["role"] == expected_role -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_anthropic_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_anthropic_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3654,83 +3648,42 @@ def test_anthropic_message_truncation( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + with start_transaction(): + client.messages.create(max_tokens=1024, messages=messages, model="model") - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_anthropic_message_truncation_async( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +async def test_anthropic_message_truncation_async(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -3746,70 +3699,30 @@ async def test_anthropic_message_truncation_async( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] + with start_transaction(): + await client.messages.create(max_tokens=1024, messages=messages, model="model") - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -5203,21 +5116,14 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_image( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_image(sentry_init, capture_events): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5238,31 +5144,15 @@ def test_message_with_base64_image( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -5412,21 +5302,14 @@ def test_message_with_file_image( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_pdf( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_pdf(sentry_init, capture_events): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5447,30 +5330,14 @@ def test_message_with_base64_pdf( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -5615,21 +5482,14 @@ def test_message_with_file_document( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_mixed_content( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_mixed_content(sentry_init, capture_events): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5666,30 +5526,14 @@ def test_message_with_mixed_content( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -5721,21 +5565,14 @@ def test_message_with_mixed_content( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_multiple_images_different_formats( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_multiple_images_different_formats(sentry_init, capture_events): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5771,30 +5608,14 @@ def test_message_with_multiple_images_different_formats( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 8da5e7ca22..d9b2736584 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1401,21 +1401,16 @@ def test_tool_calls_extraction( assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_google_genai_message_truncation( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1424,39 +1419,21 @@ def test_google_genai_message_truncation( mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents=[large_content, small_content], - config=create_test_config(), - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=[large_content, small_content], config=create_test_config(), ) - (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + (event,) = events + invoke_span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2543,21 +2520,16 @@ def test_generate_content_with_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_function_response( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with function_response (tool result).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2583,36 +2555,18 @@ def test_generate_content_with_function_response( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -2621,21 +2575,16 @@ def test_generate_content_with_function_response( assert messages[0]["content"]["output"] == "Sunny, 72F" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_mixed_string_and_content( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2652,36 +2601,18 @@ def test_generate_content_with_mixed_string_and_content( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" @@ -2744,13 +2675,8 @@ def test_generate_content_with_part_object_directly( assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_list_of_dicts( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """ Test generate_content with list of dict format inputs. @@ -2763,8 +2689,8 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2775,36 +2701,18 @@ def test_generate_content_with_list_of_dicts( {"role": "user", "parts": [{"text": "Second user message"}]}, ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7adb2d13c5..414eb67b3e 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -2949,13 +2949,7 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langchain_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langchain_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langchain integration.""" from langchain_core.outputs import LLMResult, Generation @@ -2963,8 +2957,8 @@ def test_langchain_message_truncation( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2982,101 +2976,48 @@ def test_langchain_message_truncation( "small message 5", ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] - messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - llm_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" - ] + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + name="my_pipeline", + invocation_params={ + "temperature": 0.7, + "max_tokens": 100, + "model": "gpt-3.5-turbo", + }, + ) - assert len(llm_spans) > 0 + response = LLMResult( + generations=[[Generation(text="The response")]], + llm_output={ + "token_usage": { + "total_tokens": 25, + "prompt_tokens": 10, + "completion_tokens": 15, + } + }, + ) + callback.on_llm_end(response=response, run_id=run_id) - llm_span = llm_spans[0] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + assert len(llm_spans) > 0 - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] - messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + llm_span = llm_spans[0] + assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] + messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 991c1f2269..c1e753716d 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -242,7 +242,6 @@ def original_compile(self, *args, **kwargs): assert "calculator" in tools_data -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -252,21 +251,14 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - stream_gen_ai_spans, -): +def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() test_state = { "messages": [ @@ -297,134 +289,57 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] + assert result is not None - assert len(invoke_spans) == 1 + tx = events[0] + assert tx["type"] == "transaction" - invoke_span = invoke_spans[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) == 1 - assert invoke_span["name"] == "invoke_agent test_graph" - assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" - assert ( - invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - ) - assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + invoke_span = invoke_spans[0] + assert invoke_span["description"] == "invoke_agent test_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] - request_messages = invoke_span["attributes"][ - SPANDATA.GEN_AI_REQUEST_MESSAGES - ] + request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - if isinstance(request_messages, str): - import json + if isinstance(request_messages, str): + import json - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" - response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response - - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] - tool_calls_data = invoke_span["attributes"][ - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS - ] + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response - if isinstance(tool_calls_data, str): - import json + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + if isinstance(tool_calls_data, str): + import json - tool_calls_data = json.loads(tool_calls_data) + tool_calls_data = json.loads(tool_calls_data) - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "attributes", {} - ) + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" else: - events = capture_events() - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - tx = events[0] - assert tx["type"] == "transaction" - - invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) == 1 - - invoke_span = invoke_spans[0] - - assert invoke_span["description"] == "invoke_agent test_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] - - request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - if isinstance(request_messages, str): - import json - - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" - - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response - - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - - if isinstance(tool_calls_data, str): - import json - - tool_calls_data = json.loads(tool_calls_data) - - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "data", {} - ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -2022,13 +1937,7 @@ def __init__(self, content, message_type="human"): assert "ai" not in roles -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langgraph_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langgraph_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langgraph integration.""" import json @@ -2036,8 +1945,8 @@ def test_langgraph_message_truncation( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2057,66 +1966,28 @@ def test_langgraph_message_truncation( def original_invoke(self, *args, **kwargs): return {"messages": args[0].get("messages", [])} - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - (tx,) = (item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - invoke_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 + assert result is not None + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + invoke_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) > 0 - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8ae8dca99e..404cdeb9c4 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2341,20 +2341,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_litellm_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2368,79 +2362,39 @@ def test_litellm_message_truncation( ] mock_response = MockCompletionResponse() - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0da39e842d..56ac885619 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -5720,21 +5720,16 @@ def test_openai_message_role_mapping( assert stored_messages[0]["role"] == expected_role -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_openai_message_truncation( - sentry_init, - capture_events, - capture_items, - nonstreaming_chat_completions_model_response, - stream_gen_ai_spans, + sentry_init, capture_events, nonstreaming_chat_completions_model_response ): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -5761,47 +5756,22 @@ def test_openai_message_truncation( {"role": "user", "content": large_content}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) - - span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - - messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) - - (event,) = (item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) - - (event,) = events - span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) - messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - assert isinstance(messages_data, str) + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 60f88cd7f4..f15bac5c64 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1679,16 +1679,13 @@ async def test_max_turns_before_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, capture_events, - capture_items, test_agent, get_model_response, responses_tool_call_model_responses, - stream_gen_ai_spans, ): """ Test tool execution span creation. @@ -1746,413 +1743,195 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - items = capture_items("transaction", "span") - - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) - - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - spans = [item.payload for item in items if item.type == "span"] - agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) - tool_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, ) - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, - }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} - ) - - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } - ) - - assert agent_span["name"] == "invoke_agent test_agent" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["attributes"]["gen_ai.system"] == "openai" - - assert ai_client_span1["name"] == "chat gpt-4" - assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["attributes"]["gen_ai.request.available_tools"] - )[0] + events = capture_events() - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) - - assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 - assert ( - ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 - ) - assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads( - ai_client_span1["attributes"]["gen_ai.response.tool_calls"] - ) == [tool_call] - - assert tool_span["name"] == "execute_tool simple_test_tool" - assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["attributes"]["gen_ai.system"] == "openai" - assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" - assert ( - tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, ) - assert ai_client_span2["name"] == "chat gpt-4" - assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - ai_client_span2_available_tool = json.loads( - ai_client_span2["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + (transaction,) = events + spans = transaction["spans"] + agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} ) - assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["attributes"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 - assert ( - ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } ) - assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - else: - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - events = capture_events() - - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) - (transaction,) = events - spans = transaction["spans"] - agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) - - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} - ) - - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } - ) - - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" - - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) - - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call + ] - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["data"]["gen_ai.system"] == "openai" - assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["description"] == "chat gpt-4" - assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" - - ai_client_span2_available_tool = json.loads( - ai_client_span2["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() - ) + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) - assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["data"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["data"]["gen_ai.system"] == "openai" - assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio From 5e8c254da212e907d24571911fe54dc9555b074d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:59:06 +0200 Subject: [PATCH 57/84] add pytest mark asyncio --- tests/integrations/anthropic/test_anthropic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 02de047711..21e6c95100 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3676,6 +3676,7 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 +@pytest.mark.asyncio async def test_anthropic_message_truncation_async(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( From 449457b9239b9b15606af9389ca5069d2e9412f2 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 16:17:39 +0200 Subject: [PATCH 58/84] do not leak new option and use event_opt --- sentry_sdk/client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 01b4fc8fb6..aa3a210596 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1121,6 +1121,8 @@ def capture_event( envelope.add_profile(profile.to_json(event_opt, self.options)) span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False) + if "_has_gen_ai_span" in event_opt: + del event_opt["_has_gen_ai_span"] if is_transaction and not span_recorder_has_gen_ai_span: envelope.add_transaction(event_opt) @@ -1135,7 +1137,7 @@ def capture_event( envelope.add_transaction(event_opt) converted_gen_ai_spans = [ - _serialized_v1_span_to_serialized_v2_span(span, event) + _serialized_v1_span_to_serialized_v2_span(span, event_opt) for span in gen_ai_spans if isinstance(span, dict) ] From 96f86e35d921014fcac81fefb3eaf9c15128137d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 16:19:36 +0200 Subject: [PATCH 59/84] send version field in json --- sentry_sdk/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index aa3a210596..e1da658efb 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1150,7 +1150,10 @@ def capture_event( "item_count": len(converted_gen_ai_spans), }, payload=PayloadRef( - json={"items": converted_gen_ai_spans}, + json={ + "version": 2, + "items": converted_gen_ai_spans, + }, ), ) ) From aba2cf12b0df5c5c3e39267365dda4efd7c0f556 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 16:31:45 +0200 Subject: [PATCH 60/84] fix op fallback --- sentry_sdk/client.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index e1da658efb..053c73085c 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -174,11 +174,12 @@ def _serialized_v1_span_to_serialized_v2_span( res["span_id"] = span["span_id"] if "description" in span: - res["name"] = span["description"] - elif ( - "op" in span - ): # fallback based on observed downstream fallback for transactions - res["name"] = span["op"] + description = span["description"] + + if description is None and "op" in span: + res["name"] = span["op"] + + res["name"] = description if "start_timestamp" in span: start_timestamp = None From a48d7013654ecb015788e237e940cfd6aa1112c8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 16:35:27 +0200 Subject: [PATCH 61/84] fix logic --- sentry_sdk/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 053c73085c..ca5cad4da4 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -177,7 +177,7 @@ def _serialized_v1_span_to_serialized_v2_span( description = span["description"] if description is None and "op" in span: - res["name"] = span["op"] + description = span["op"] res["name"] = description From dcce855e4f19e77174c97045f2976901bb682c00 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 16:44:56 +0200 Subject: [PATCH 62/84] simplify logic --- sentry_sdk/client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index ca5cad4da4..d198b7f854 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1083,6 +1083,8 @@ def capture_event( event_id = event.get("event_id") if event_id is None: event["event_id"] = event_id = uuid.uuid4().hex + + span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False) event_opt = self._prepare_event(event, hint, scope) if event_opt is None: return None @@ -1121,10 +1123,6 @@ def capture_event( if is_transaction and isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False) - if "_has_gen_ai_span" in event_opt: - del event_opt["_has_gen_ai_span"] - if is_transaction and not span_recorder_has_gen_ai_span: envelope.add_transaction(event_opt) elif is_transaction: From 43920b54de1a373d02d83ba3f5fac382e54c4cab Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 19:56:04 +0200 Subject: [PATCH 63/84] promote to top level option --- .../integrations/anthropic/test_anthropic.py | 104 +++++++++--------- .../google_genai/test_google_genai.py | 72 ++++++------ .../huggingface_hub/test_huggingface_hub.py | 16 +-- .../integrations/langchain/test_langchain.py | 54 ++++----- .../integrations/langgraph/test_langgraph.py | 36 +++--- tests/integrations/litellm/test_litellm.py | 54 ++++----- tests/integrations/openai/test_openai.py | 86 +++++++-------- .../openai_agents/test_openai_agents.py | 104 +++++++++--------- .../pydantic_ai/test_pydantic_ai.py | 80 +++++++------- 9 files changed, 303 insertions(+), 303 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 2e240b9c8f..f9f6241997 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -103,7 +103,7 @@ def test_nonstreaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -238,7 +238,7 @@ async def test_nonstreaming_create_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncAnthropic(api_key="z") @@ -409,7 +409,7 @@ def test_streaming_create_message( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -574,7 +574,7 @@ def test_streaming_create_message_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -726,7 +726,7 @@ def test_streaming_create_message_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -891,7 +891,7 @@ def test_stream_messages( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1055,7 +1055,7 @@ def test_stream_messages_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1215,7 +1215,7 @@ def test_stream_messages_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1386,7 +1386,7 @@ async def test_streaming_create_message_async( traces_sample_rate=1.0, default_integrations=False, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1555,7 +1555,7 @@ async def test_streaming_create_message_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1709,7 +1709,7 @@ async def test_streaming_create_message_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -1879,7 +1879,7 @@ async def test_stream_message_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2044,7 +2044,7 @@ async def test_stream_messages_async_api_error( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2205,7 +2205,7 @@ async def test_stream_messages_async_close( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2417,7 +2417,7 @@ def test_streaming_create_message_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2619,7 +2619,7 @@ def test_stream_messages_with_input_json_delta( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2827,7 +2827,7 @@ async def test_streaming_create_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -3037,7 +3037,7 @@ async def test_stream_message_with_input_json_delta_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -3154,7 +3154,7 @@ def test_exception_message_create( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -3202,7 +3202,7 @@ def test_span_status_error( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("event", "span") @@ -3264,7 +3264,7 @@ async def test_span_status_error_async( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("event", "span") @@ -3326,7 +3326,7 @@ async def test_exception_message_create_async( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncAnthropic(api_key="z") @@ -3373,7 +3373,7 @@ def test_span_origin( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -3423,7 +3423,7 @@ async def test_span_origin_async( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncAnthropic(api_key="z") @@ -3561,7 +3561,7 @@ def test_anthropic_message_role_mapping( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -3637,7 +3637,7 @@ def test_anthropic_message_truncation( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -3729,7 +3729,7 @@ async def test_anthropic_message_truncation_async( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncAnthropic(api_key="z") @@ -3837,7 +3837,7 @@ def test_nonstreaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -3994,7 +3994,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncAnthropic(api_key="z") @@ -4191,7 +4191,7 @@ def test_streaming_create_message_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -4388,7 +4388,7 @@ def test_stream_messages_with_system_prompt( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -4578,7 +4578,7 @@ async def test_stream_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -4773,7 +4773,7 @@ async def test_streaming_create_message_with_system_prompt_async( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -4917,7 +4917,7 @@ def test_system_prompt_with_complex_structure( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5215,7 +5215,7 @@ def test_message_with_base64_image( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5289,7 +5289,7 @@ def test_message_with_url_image( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5356,7 +5356,7 @@ def test_message_with_file_image( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5424,7 +5424,7 @@ def test_message_with_base64_pdf( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5492,7 +5492,7 @@ def test_message_with_url_pdf( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5559,7 +5559,7 @@ def test_message_with_file_document( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5627,7 +5627,7 @@ def test_message_with_mixed_content( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5733,7 +5733,7 @@ def test_message_with_multiple_images_different_formats( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5831,7 +5831,7 @@ def test_binary_content_not_stored_when_pii_disabled( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5891,7 +5891,7 @@ def test_binary_content_not_stored_when_prompts_disabled( integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -5950,7 +5950,7 @@ def test_cache_tokens_nonstreaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -6028,7 +6028,7 @@ def test_input_tokens_include_cache_write_nonstreaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -6110,7 +6110,7 @@ def test_input_tokens_include_cache_read_nonstreaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -6216,7 +6216,7 @@ def test_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -6315,7 +6315,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -6382,7 +6382,7 @@ def test_input_tokens_unchanged_without_caching( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = Anthropic(api_key="z") @@ -6474,7 +6474,7 @@ def test_cache_tokens_streaming( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -6568,7 +6568,7 @@ def test_stream_messages_cache_tokens( sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 8da5e7ca22..94bfea91fd 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -137,7 +137,7 @@ def test_nonstreaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock the HTTP response at the _api_client.request() level @@ -281,7 +281,7 @@ def test_generate_content_with_system_instruction( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -364,7 +364,7 @@ def test_generate_content_with_tools( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Create a mock tool function @@ -482,7 +482,7 @@ def test_tool_execution( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Create a mock tool function @@ -548,7 +548,7 @@ def test_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("event", "transaction") @@ -604,7 +604,7 @@ def test_streaming_generate_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Create streaming chunks - simulating a multi-chunk response @@ -797,7 +797,7 @@ def test_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -849,7 +849,7 @@ def test_response_without_usage_metadata( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response without usage metadata @@ -917,7 +917,7 @@ def test_multiple_candidates( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response with multiple candidates @@ -1021,7 +1021,7 @@ def test_all_configuration_parameters( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1103,7 +1103,7 @@ def test_empty_response( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Minimal response with empty candidates array @@ -1158,7 +1158,7 @@ def test_response_with_different_id_fields( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response with response_id and model_version @@ -1248,7 +1248,7 @@ def test_contents_as_none( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1299,7 +1299,7 @@ def test_tool_calls_extraction( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response with function calls @@ -1414,7 +1414,7 @@ def test_google_genai_message_truncation( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) large_content = ( @@ -1515,7 +1515,7 @@ def test_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock the HTTP response at the _api_client.request() level @@ -1637,7 +1637,7 @@ def test_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock response with single embedding @@ -1717,7 +1717,7 @@ def test_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("transaction", "event") @@ -1774,7 +1774,7 @@ def test_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response without statistics (typical for older google-genai versions) @@ -1839,7 +1839,7 @@ def test_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1901,7 +1901,7 @@ async def test_async_embed_content( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock the async HTTP response @@ -2024,7 +2024,7 @@ async def test_async_embed_content_string_input( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock response with single embedding @@ -2109,7 +2109,7 @@ async def test_async_embed_content_error_handling( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2168,7 +2168,7 @@ async def test_async_embed_content_without_statistics( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Response without statistics (typical for older google-genai versions) @@ -2238,7 +2238,7 @@ async def test_async_embed_content_span_origin( sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -2296,7 +2296,7 @@ def test_generate_content_with_content_object( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2356,7 +2356,7 @@ def test_generate_content_with_dict_format( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2414,7 +2414,7 @@ def test_generate_content_with_file_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2487,7 +2487,7 @@ def test_generate_content_with_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2556,7 +2556,7 @@ def test_generate_content_with_function_response( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2634,7 +2634,7 @@ def test_generate_content_with_mixed_string_and_content( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2701,7 +2701,7 @@ def test_generate_content_with_part_object_directly( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2763,7 +2763,7 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2823,7 +2823,7 @@ def test_generate_content_with_dict_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2891,7 +2891,7 @@ def test_generate_content_without_parts_property_inline_data( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2957,7 +2957,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 2d94082e7b..dc9d7925ff 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -483,7 +483,7 @@ def test_text_generation( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = InferenceClient(model="test-model") @@ -618,7 +618,7 @@ def test_text_generation_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = InferenceClient(model="test-model") @@ -751,7 +751,7 @@ def test_chat_completion( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = get_hf_provider_inference_client() @@ -888,7 +888,7 @@ def test_chat_completion_streaming( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = get_hf_provider_inference_client() @@ -1024,7 +1024,7 @@ def test_chat_completion_api_error( ): sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = get_hf_provider_inference_client() @@ -1140,7 +1140,7 @@ def test_span_status_error( sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -1210,7 +1210,7 @@ def test_chat_completion_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = get_hf_provider_inference_client() @@ -1366,7 +1366,7 @@ def test_chat_completion_streaming_with_tools( traces_sample_rate=1.0, send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = get_hf_provider_inference_client() diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7adb2d13c5..e8d96d7eb1 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -273,7 +273,7 @@ def test_langchain_text_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) model_response = get_model_response( @@ -395,7 +395,7 @@ def test_langchain_chat_with_run_name( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) request_headers = {} @@ -484,7 +484,7 @@ def test_langchain_tool_call_with_run_name( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -566,7 +566,7 @@ def test_langchain_create_agent( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) model_response = get_model_response( @@ -766,7 +766,7 @@ def test_tool_execution_span( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) responses = responses_tool_call_model_responses( @@ -1056,7 +1056,7 @@ def test_langchain_openai_tools_agent_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -1328,7 +1328,7 @@ def test_langchain_openai_tools_agent( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -1605,7 +1605,7 @@ def test_langchain_openai_tools_agent_with_config( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -1726,7 +1726,7 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -2000,7 +2000,7 @@ def test_langchain_openai_tools_agent_stream( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -2293,7 +2293,7 @@ def test_langchain_openai_tools_agent_stream_with_config( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -2400,7 +2400,7 @@ def test_langchain_error( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -2454,7 +2454,7 @@ def test_span_status_error( sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("event", "transaction", "span") @@ -2761,7 +2761,7 @@ def test_langchain_message_role_mapping( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) prompt = ChatPromptTemplate.from_messages( @@ -2963,7 +2963,7 @@ def test_langchain_message_truncation( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -3114,7 +3114,7 @@ def test_langchain_embeddings_sync( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -3262,7 +3262,7 @@ def test_langchain_embeddings_embed_query( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -3403,7 +3403,7 @@ async def test_langchain_embeddings_async( integrations=[LangchainIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) async def mock_aembed_documents(self, texts): @@ -3554,7 +3554,7 @@ async def test_langchain_embeddings_aembed_query( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) async def mock_aembed_query(self, text): @@ -3671,7 +3671,7 @@ def test_langchain_embeddings_no_model_name( sentry_init( integrations=[LangchainIntegration(include_prompts=False)], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -3772,7 +3772,7 @@ def test_langchain_embeddings_integration_disabled( sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Initialize without LangchainIntegration @@ -3844,7 +3844,7 @@ def test_langchain_embeddings_multiple_providers( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -3999,7 +3999,7 @@ def test_langchain_embeddings_multiple_calls( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -4125,7 +4125,7 @@ def test_langchain_embeddings_span_hierarchy( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("transaction", "span") @@ -4235,7 +4235,7 @@ def test_langchain_embeddings_with_list_and_string_inputs( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: items = capture_items("span") @@ -4359,7 +4359,7 @@ def test_langchain_response_model_extraction( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -4718,7 +4718,7 @@ def test_langchain_ai_system_detection( sentry_init( integrations=[LangchainIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 991c1f2269..6dd5c3cace 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -160,7 +160,7 @@ def test_state_graph_compile( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) graph = MockStateGraph() @@ -265,7 +265,7 @@ def test_pregel_invoke( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -450,7 +450,7 @@ def test_pregel_ainvoke( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = {"messages": [MockMessage("What's the weather like?", name="user")]} @@ -598,7 +598,7 @@ def test_pregel_invoke_error( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = {"messages": [MockMessage("This will fail")]} @@ -660,7 +660,7 @@ def test_pregel_ainvoke_error( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = {"messages": [MockMessage("This will fail async")]} @@ -720,7 +720,7 @@ def test_span_origin( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) graph = MockStateGraph() @@ -773,7 +773,7 @@ def test_pregel_invoke_with_different_graph_names( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance() @@ -854,7 +854,7 @@ def test_pregel_invoke_span_includes_usage_data( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -970,7 +970,7 @@ def test_pregel_ainvoke_span_includes_usage_data( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1086,7 +1086,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1201,7 +1201,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1316,7 +1316,7 @@ def test_pregel_invoke_span_includes_response_model( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1425,7 +1425,7 @@ def test_pregel_ainvoke_span_includes_response_model( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1534,7 +1534,7 @@ def test_pregel_invoke_span_uses_last_response_model( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1654,7 +1654,7 @@ def test_pregel_ainvoke_span_uses_last_response_model( sentry_init( integrations=[LanggraphIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_state = { @@ -1819,7 +1819,7 @@ def test_extraction_functions_complex_scenario( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) pregel = MockPregelInstance("complex_graph") @@ -1930,7 +1930,7 @@ def test_langgraph_message_role_mapping( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Mock a langgraph message with mixed roles @@ -2036,7 +2036,7 @@ def test_langgraph_message_truncation( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) large_content = ( diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8ae8dca99e..a0120cd7b5 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -156,7 +156,7 @@ def test_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -299,7 +299,7 @@ async def test_async_nonstreaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -445,7 +445,7 @@ def test_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -553,7 +553,7 @@ async def test_async_streaming_chat_completion( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -658,7 +658,7 @@ def test_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="test-key") @@ -777,7 +777,7 @@ async def test_async_embeddings_create( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="test-key") @@ -892,7 +892,7 @@ def test_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="test-key") @@ -1003,7 +1003,7 @@ async def test_async_embeddings_create_with_list_input( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="test-key") @@ -1115,7 +1115,7 @@ def test_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="test-key") @@ -1213,7 +1213,7 @@ async def test_async_embeddings_no_pii( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="test-key") @@ -1308,7 +1308,7 @@ def test_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1374,7 +1374,7 @@ async def test_async_exception_handling( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1441,7 +1441,7 @@ def test_span_origin( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1523,7 +1523,7 @@ def test_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1698,7 +1698,7 @@ async def test_async_multiple_providers( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1877,7 +1877,7 @@ def test_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -1986,7 +1986,7 @@ async def test_async_additional_parameters( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -2096,7 +2096,7 @@ def test_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -2181,7 +2181,7 @@ async def test_async_no_integration( """Test that when integration is not enabled, callbacks don't break.""" sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -2266,7 +2266,7 @@ def test_response_without_usage( sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [{"role": "user", "content": "Hello!"}] @@ -2353,7 +2353,7 @@ def test_litellm_message_truncation( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) large_content = ( @@ -2463,7 +2463,7 @@ def test_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2583,7 +2583,7 @@ async def test_async_binary_content_encoding_image_url( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2705,7 +2705,7 @@ def test_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2814,7 +2814,7 @@ async def test_async_binary_content_encoding_mixed_content( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -2927,7 +2927,7 @@ def test_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ @@ -3044,7 +3044,7 @@ async def test_async_binary_content_encoding_uri_type( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) messages = [ diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0da39e842d..af0932eeb9 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -125,7 +125,7 @@ def test_nonstreaming_chat_completion_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -288,7 +288,7 @@ def test_nonstreaming_chat_completion( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -453,7 +453,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -611,7 +611,7 @@ async def test_nonstreaming_chat_completion_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -785,7 +785,7 @@ def test_streaming_chat_completion_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -964,7 +964,7 @@ def test_streaming_chat_completion_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -1073,7 +1073,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -1165,7 +1165,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -1259,7 +1259,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( integrations=[OpenAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -1416,7 +1416,7 @@ def test_streaming_chat_completion( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -1656,7 +1656,7 @@ async def test_streaming_chat_completion_async_no_prompts( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -1889,7 +1889,7 @@ async def test_streaming_chat_completion_async( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2131,7 +2131,7 @@ def test_bad_chat_completion( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2176,7 +2176,7 @@ def test_span_status_error( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2229,7 +2229,7 @@ async def test_bad_chat_completion_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2279,7 +2279,7 @@ def test_embeddings_create_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -2397,7 +2397,7 @@ def test_embeddings_create( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -2539,7 +2539,7 @@ async def test_embeddings_create_async_no_pii( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2658,7 +2658,7 @@ async def test_embeddings_create_async( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2797,7 +2797,7 @@ def test_embeddings_create_raises_error( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -2842,7 +2842,7 @@ async def test_embeddings_create_raises_error_async( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2884,7 +2884,7 @@ def test_span_origin_nonstreaming_chat( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -2941,7 +2941,7 @@ async def test_span_origin_nonstreaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -2996,7 +2996,7 @@ def test_span_origin_streaming_chat( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -3082,7 +3082,7 @@ async def test_span_origin_streaming_chat_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -3174,7 +3174,7 @@ def test_span_origin_embeddings( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -3225,7 +3225,7 @@ async def test_span_origin_embeddings_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -3642,7 +3642,7 @@ def test_ai_client_span_responses_api_no_pii( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -3814,7 +3814,7 @@ def test_ai_client_span_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -4237,7 +4237,7 @@ def test_responses_api_conversation_id( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -4290,7 +4290,7 @@ def test_error_in_responses_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -4425,7 +4425,7 @@ async def test_ai_client_span_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -4911,7 +4911,7 @@ async def test_ai_client_span_streaming_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -5196,7 +5196,7 @@ async def test_error_in_responses_async_api( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -5348,7 +5348,7 @@ def test_streaming_responses_api( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -5473,7 +5473,7 @@ async def test_streaming_responses_api_async( ], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -5590,7 +5590,7 @@ def test_empty_tools_in_chat_completion( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -5668,7 +5668,7 @@ def test_openai_message_role_mapping( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -5733,7 +5733,7 @@ def test_openai_message_truncation( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -5825,7 +5825,7 @@ def test_streaming_chat_completion_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -5933,7 +5933,7 @@ async def test_streaming_chat_completion_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") @@ -6042,7 +6042,7 @@ def test_streaming_responses_api_ttft( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = OpenAI(api_key="z") @@ -6121,7 +6121,7 @@ async def test_streaming_responses_api_ttft_async( sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) client = AsyncOpenAI(api_key="z") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 60f88cd7f4..9cdb3ea6f8 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -186,7 +186,7 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -249,7 +249,7 @@ async def test_agent_invocation_span_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -404,7 +404,7 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -573,7 +573,7 @@ async def test_agent_invocation_span( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -762,7 +762,7 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span") @@ -790,7 +790,7 @@ async def test_client_span_custom_model( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -840,7 +840,7 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -900,7 +900,7 @@ def test_agent_invocation_span_sync_no_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -1051,7 +1051,7 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -1200,7 +1200,7 @@ def test_agent_invocation_span_sync( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -1449,7 +1449,7 @@ async def test_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("transaction", "span") @@ -1482,7 +1482,7 @@ async def test_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -1619,7 +1619,7 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("transaction", "span") @@ -1652,7 +1652,7 @@ async def test_max_turns_before_handoff_span( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -1756,7 +1756,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("transaction", "span") @@ -1970,7 +1970,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -2458,7 +2458,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -2512,7 +2512,7 @@ def simple_test_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -2566,7 +2566,7 @@ async def test_error_handling( LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("event", "span", "transaction") @@ -2613,7 +2613,7 @@ async def test_error_handling( LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -2689,7 +2689,7 @@ async def test_error_captures_input_data( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("event", "span") @@ -2732,7 +2732,7 @@ async def test_error_captures_input_data( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -2785,7 +2785,7 @@ async def test_span_status_error( LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("event", "transaction", "span") @@ -2814,7 +2814,7 @@ async def test_span_status_error( LoggingIntegration(event_level=logging.CRITICAL), ], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -2933,7 +2933,7 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -2979,7 +2979,7 @@ async def test_mcp_tool_execution_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3115,7 +3115,7 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3154,7 +3154,7 @@ async def test_mcp_tool_execution_with_error( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3287,7 +3287,7 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3325,7 +3325,7 @@ async def test_mcp_tool_execution_without_pii( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3387,7 +3387,7 @@ async def test_multiple_agents_asyncio( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3417,7 +3417,7 @@ async def run(): sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3567,7 +3567,7 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3609,7 +3609,7 @@ def failing_tool(message: str) -> str: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3712,7 +3712,7 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3752,7 +3752,7 @@ async def test_invoke_agent_span_includes_usage_data( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3848,7 +3848,7 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -3880,7 +3880,7 @@ async def test_ai_client_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -3971,7 +3971,7 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -4002,7 +4002,7 @@ async def test_ai_client_span_response_model_with_chat_completions( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -4129,7 +4129,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -4167,7 +4167,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -4261,7 +4261,7 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -4306,7 +4306,7 @@ async def test_invoke_agent_span_includes_response_model( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -4441,7 +4441,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -4485,7 +4485,7 @@ def calculator(a: int, b: int) -> int: integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -4795,7 +4795,7 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -4843,7 +4843,7 @@ async def test_conversation_id_on_all_spans( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -4984,7 +4984,7 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -5026,7 +5026,7 @@ def simple_tool(message: str) -> str: sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -5095,7 +5095,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span", "transaction") @@ -5134,7 +5134,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index b2dfe76988..5cea5063ff 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -68,7 +68,7 @@ async def test_agent_run_async( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -150,7 +150,7 @@ async def test_agent_run_async_model_error( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) def failing_model(messages, info): @@ -205,7 +205,7 @@ async def test_agent_run_async_usage_data( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -269,7 +269,7 @@ def test_agent_run_sync( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -336,7 +336,7 @@ def test_agent_run_sync_model_error( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) def failing_model(messages, info): @@ -391,7 +391,7 @@ async def test_agent_run_stream( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -478,7 +478,7 @@ async def test_agent_run_stream_events( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Consume all events @@ -545,7 +545,7 @@ async def test_agent_with_tools( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -651,7 +651,7 @@ async def test_agent_with_tool_model_retry( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) retries = 0 @@ -790,7 +790,7 @@ async def test_agent_with_tool_validation_error( ], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -910,7 +910,7 @@ async def test_agent_with_tools_streaming( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -994,7 +994,7 @@ async def test_model_settings( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent_with_settings = get_test_agent_with_settings() @@ -1068,7 +1068,7 @@ async def test_system_prompt_attribute( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -1147,7 +1147,7 @@ async def test_error_handling( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -1190,7 +1190,7 @@ async def test_without_pii( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -1244,7 +1244,7 @@ async def test_without_pii_tools( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=False, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1304,7 +1304,7 @@ async def test_multiple_agents_concurrent( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1363,7 +1363,7 @@ async def test_message_history( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Second message with history @@ -1444,7 +1444,7 @@ async def test_gen_ai_system( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1500,7 +1500,7 @@ async def test_include_prompts_false( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1554,7 +1554,7 @@ async def test_include_prompts_true( integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1608,7 +1608,7 @@ async def test_include_prompts_false_with_tools( integrations=[PydanticAIIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1669,7 +1669,7 @@ async def test_include_prompts_requires_pii( integrations=[PydanticAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, # PII disabled - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -1784,7 +1784,7 @@ async def mock_map_tool_result_part(part): integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2080,7 +2080,7 @@ async def test_invoke_agent_with_list_user_prompt( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2146,7 +2146,7 @@ async def test_invoke_agent_with_instructions( integrations=[PydanticAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2307,7 +2307,7 @@ async def test_usage_data_partial( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2358,7 +2358,7 @@ async def test_agent_data_from_scope( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2397,7 +2397,7 @@ async def test_available_tools_without_description( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -2454,7 +2454,7 @@ async def test_output_with_tool_calls( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) test_agent = get_test_agent() @@ -2525,7 +2525,7 @@ async def test_message_formatting_with_different_parts( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) # Create message history with different part types @@ -2651,7 +2651,7 @@ async def test_agent_without_name( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -2849,7 +2849,7 @@ def test_tool(x: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -3775,7 +3775,7 @@ async def test_binary_content_encoding_image( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -3830,7 +3830,7 @@ async def test_binary_content_encoding_mixed_content( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -3901,7 +3901,7 @@ async def test_binary_content_in_agent_run( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) binary_content = BinaryContent( @@ -3957,7 +3957,7 @@ async def test_set_usage_data_with_cache_tokens( sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -4053,7 +4053,7 @@ def test_image_url_base64_content_in_span( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) found_image = False @@ -4157,7 +4157,7 @@ async def test_invoke_agent_image_url( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) agent = Agent("test", name="test_image_url_agent") @@ -4233,7 +4233,7 @@ def multiply_numbers(a: int, b: int) -> int: integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: From 9b4ad4bd5bc4746447d52df97a2d88b0e34f36c3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:04:12 +0200 Subject: [PATCH 64/84] add parameter --- sentry_sdk/consts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index d2b4cd89af..0a58292d6d 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -1218,6 +1218,7 @@ def __init__( before_send_metric: "Optional[Callable[[Metric, Hint], Optional[Metric]]]" = None, org_id: "Optional[str]" = None, strict_trace_continuation: bool = False, + stream_gen_ai_spans: bool = False, ) -> None: """Initialize the Sentry SDK with the given parameters. All parameters described here can be used in a call to `sentry_sdk.init()`. @@ -1633,6 +1634,9 @@ def __init__( but you can provide it explicitly for self-hosted and Relay setups. This value is used for trace propagation and for features like `strict_trace_continuation`. + :param stream_gen_ai_spans: When set, generative AI spans are sent in a new transport format to + reduce downstream data loss. + :param _experiments: """ pass From 5889ad968a404e82e40a58b5d3e3147febe161da Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:06:55 +0200 Subject: [PATCH 65/84] update tracing --- sentry_sdk/tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 96029f1f58..6cf2527fe3 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1042,7 +1042,7 @@ def finish( finished_spans = [] has_gen_ai_span = False - if client.options["_experiments"].get("stream_gen_ai_spans", False): + if client.options.get("stream_gen_ai_spans", False): for span in self._span_recorder.spans: if span.timestamp is None: continue From c948c14af1ef232d7653444d74683c46a85bbff5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:12:15 +0200 Subject: [PATCH 66/84] update to non-experimental option --- sentry_sdk/ai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 4bd65ced76..fb9edcd335 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -742,7 +742,7 @@ def truncate_and_annotate_messages( max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": client = sentry_sdk.get_client() - if client.options["_experiments"].get("stream_gen_ai_spans", False): + if client.options.get("stream_gen_ai_spans", False): return messages if not messages: @@ -766,7 +766,7 @@ def truncate_and_annotate_embedding_inputs( max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": client = sentry_sdk.get_client() - if client.options["_experiments"].get("stream_gen_ai_spans", False): + if client.options.get("stream_gen_ai_spans", False): return messages if not messages: From cf04adba9c7a6d960d1abb16851d316160a40665 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:14:02 +0200 Subject: [PATCH 67/84] update more tests --- tests/tracing/test_decorator.py | 8 ++++---- tests/tracing/test_misc.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py index 8d7c97fdbf..4e0c6cc1a9 100644 --- a/tests/tracing/test_decorator.py +++ b/tests/tracing/test_decorator.py @@ -169,7 +169,7 @@ def my_agent(): if stream_gen_ai_spans: sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) items = capture_items("span") @@ -251,7 +251,7 @@ def my_agent(): else: sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) events = capture_events() @@ -361,7 +361,7 @@ def my_agent(): sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: @@ -537,7 +537,7 @@ def my_agent(*args, **kwargs): sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 1119f42461..1066bcb709 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -653,7 +653,7 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op( """Span with gen_ai.* op should get conversation_id.""" sentry_init( traces_sample_rate=1.0, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, + stream_gen_ai_spans=stream_gen_ai_spans, ) if stream_gen_ai_spans: From 398559b8d8b87ed712b52651dbbbc5bdc4ad94b9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:32:39 +0200 Subject: [PATCH 68/84] restore legitimate test --- .../openai_agents/test_openai_agents.py | 589 ++++++++++++------ 1 file changed, 414 insertions(+), 175 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index aa2dcab76e..4752ac0376 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1679,13 +1679,16 @@ async def test_max_turns_before_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, capture_events, + capture_items, test_agent, get_model_response, responses_tool_call_model_responses, + stream_gen_ai_spans, ): """ Test tool execution span creation. @@ -1743,195 +1746,431 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) + + items = capture_items("transaction", "span") + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] + agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL ) - events = capture_events() + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" + + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() ) - (transaction,) = events - spans = transaction["spans"] - agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) - - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, - }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ( + ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads( + ai_client_span1["attributes"]["gen_ai.response.tool_calls"] + ) == [tool_call] + + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" + assert ( + tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" ) + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() ) - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" - - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call - ] + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["data"]["gen_ai.system"] == "openai" - assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["description"] == "chat gpt-4" - assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" - - ai_client_span2_available_tool = json.loads( - ai_client_span2["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() - ) + events = capture_events() - assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call ] - ) - assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["data"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["data"]["gen_ai.system"] == "openai" - assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio From ec57859f5bfe0a4b50c8993d56a3415564060a1f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 07:42:29 +0200 Subject: [PATCH 69/84] test(langchain): Inline global state --- .../integrations/langchain/test_langchain.py | 100 ++++++++++-------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 240a78e2cc..336be2fb1e 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -237,26 +237,6 @@ def get_word_length(word: str) -> int: return len(word) -global stream_result_mock # type: Mock -global llm_type # type: str - - -class MockOpenAI(ChatOpenAI): - def _stream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Iterator[ChatGenerationChunk]: - for x in stream_result_mock(): - yield x - - @property - def _llm_type(self) -> str: - return llm_type - - def test_langchain_text_completion( sentry_init, capture_events, @@ -1488,8 +1468,22 @@ def test_langchain_openai_tools_agent_stream_with_config( def test_langchain_error(sentry_init, capture_events): - global llm_type - llm_type = "acme-llm" + class MockOpenAI(ChatOpenAI): + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + stream_result_mock = Mock(side_effect=ValueError("API rate limit error")) + + for x in stream_result_mock(): + yield x + + @property + def _llm_type(self) -> str: + return "acme-llm" sentry_init( integrations=[LangchainIntegration(include_prompts=True)], @@ -1508,8 +1502,6 @@ def test_langchain_error(sentry_init, capture_events): MessagesPlaceholder(variable_name="agent_scratchpad"), ] ) - global stream_result_mock - stream_result_mock = Mock(side_effect=ValueError("API rate limit error")) llm = MockOpenAI( model_name="gpt-3.5-turbo", temperature=0, @@ -1527,8 +1519,22 @@ def test_langchain_error(sentry_init, capture_events): def test_span_status_error(sentry_init, capture_events): - global llm_type - llm_type = "acme-llm" + class MockOpenAI(ChatOpenAI): + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + stream_result_mock = Mock(side_effect=ValueError("API rate limit error")) + + for x in stream_result_mock(): + yield x + + @property + def _llm_type(self) -> str: + return "acme-llm" sentry_init( integrations=[LangchainIntegration(include_prompts=True)], @@ -1547,8 +1553,6 @@ def test_span_status_error(sentry_init, capture_events): MessagesPlaceholder(variable_name="agent_scratchpad"), ] ) - global stream_result_mock - stream_result_mock = Mock(side_effect=ValueError("API rate limit error")) llm = MockOpenAI( model_name="gpt-3.5-turbo", temperature=0, @@ -1781,8 +1785,32 @@ def test_langchain_callback_list_existing_callback(sentry_init): def test_langchain_message_role_mapping(sentry_init, capture_events): """Test that message roles are properly normalized in langchain integration.""" - global llm_type - llm_type = "openai-chat" + + class MockOpenAI(ChatOpenAI): + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + stream_result_mock = Mock( + side_effect=[ + [ + ChatGenerationChunk( + type="ChatGenerationChunk", + message=AIMessageChunk(content="Test response"), + ), + ] + ] + ) + + for x in stream_result_mock(): + yield x + + @property + def _llm_type(self) -> str: + return "openai-chat" sentry_init( integrations=[LangchainIntegration(include_prompts=True)], @@ -1799,18 +1827,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_events): ] ) - global stream_result_mock - stream_result_mock = Mock( - side_effect=[ - [ - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk(content="Test response"), - ), - ] - ] - ) - llm = MockOpenAI( model_name="gpt-3.5-turbo", temperature=0, From 7886629e3e1d240ab43f146063ca36b4d6c4ec3b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 07:46:45 +0200 Subject: [PATCH 70/84] add parameterization --- tests/integrations/langchain/test_langchain.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 22b11e83aa..be19e9a790 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -237,6 +237,7 @@ def get_word_length(word: str) -> int: return len(word) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_langchain_text_completion( sentry_init, capture_events, From b7811723c19ecd7b916614ac63c8451f3b3aeef7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 07:54:11 +0200 Subject: [PATCH 71/84] restore langgraph test --- .../integrations/langgraph/test_langgraph.py | 168 +++++++++++++----- 1 file changed, 127 insertions(+), 41 deletions(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index c7032b009b..80a20fb617 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -242,6 +242,7 @@ def original_compile(self, *args, **kwargs): assert "calculator" in tools_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -251,14 +252,21 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts): +def test_pregel_invoke( + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, +): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + stream_gen_ai_spans=stream_gen_ai_spans, ) - events = capture_events() test_state = { "messages": [ @@ -289,57 +297,135 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - tx = events[0] - assert tx["type"] == "transaction" + assert result is not None - invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent test_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + assert len(invoke_spans) == 1 - request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + invoke_span = invoke_spans[0] - if isinstance(request_messages, str): - import json + assert invoke_span["name"] == "invoke_agent test_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response + request_messages = invoke_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_MESSAGES + ] - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - if isinstance(tool_calls_data, str): - import json + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Hello, can you help me?" + assert request_messages[1]["content"] == "Of course! How can I assist you?" + + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + if isinstance(tool_calls_data, str): + import json - tool_calls_data = json.loads(tool_calls_data) + tool_calls_data = json.loads(tool_calls_data) - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + + assert invoke_span["description"] == "invoke_agent test_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + + request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" + + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + if isinstance(tool_calls_data, str): + import json + + tool_calls_data = json.loads(tool_calls_data) + + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "data", {} + ) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) From b618cc8be08ea91f7c01cf21d36c52d490da663a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 08:13:01 +0200 Subject: [PATCH 72/84] update test --- tests/integrations/langgraph/test_langgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 80a20fb617..f308127276 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -337,7 +337,7 @@ def original_invoke(self, *args, **kwargs): import json request_messages = json.loads(request_messages) - assert len(request_messages) == 1 + assert len(request_messages) == 2 assert request_messages[0]["content"] == "Hello, can you help me?" assert request_messages[1]["content"] == "Of course! How can I assist you?" From e85dffe2ab7c7495170f998817f55dba3c737a04 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 09:58:22 +0200 Subject: [PATCH 73/84] remove None conversion --- sentry_sdk/client.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index d198b7f854..39d8a4dea4 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -149,12 +149,6 @@ def _serialized_v1_attribute_to_serialized_v2_attribute( "type": "string", } - if attribute_value is None: - return { - "value": "None", - "type": "string", - } - return None From f8f98c16f38abba9dd0d0a042c054636c17ea303 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:01:57 +0200 Subject: [PATCH 74/84] update test with None attribute assertion --- tests/integrations/openai_agents/test_openai_agents.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9cdb3ea6f8..bf44562b14 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -3140,7 +3140,6 @@ async def test_mcp_tool_execution_with_error( assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None" # Verify error status was set assert mcp_tool_span["status"] == "error" From b46fd5f087f1b0203b054b675f45b27742ad6bd7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:06:41 +0200 Subject: [PATCH 75/84] mostly whitespace test cleanup --- .../integrations/anthropic/test_anthropic.py | 3 - .../google_genai/test_google_genai.py | 29 +- .../huggingface_hub/test_huggingface_hub.py | 1 - .../integrations/langchain/test_langchain.py | 49 -- .../integrations/langgraph/test_langgraph.py | 35 -- tests/integrations/litellm/test_litellm.py | 27 - tests/integrations/openai/test_openai.py | 487 +++++++++++------- .../pydantic_ai/test_pydantic_ai.py | 25 +- 8 files changed, 319 insertions(+), 337 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index f9f6241997..31f487aef2 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3675,7 +3675,6 @@ def test_anthropic_message_truncation( assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -3760,7 +3759,6 @@ async def test_anthropic_message_truncation_async( for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] - assert len(chat_spans) > 0 chat_span = chat_spans[0] @@ -3794,7 +3792,6 @@ async def test_anthropic_message_truncation_async( chat_spans = [ span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT ] - assert len(chat_spans) > 0 chat_span = chat_spans[0] diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 94bfea91fd..79318eaea5 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -427,7 +427,6 @@ def get_weather(location: str) -> str: tools_data_str = invoke_span["attributes"][ SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS ] - # Parse the JSON string to verify content tools_data = json.loads(tools_data_str) assert len(tools_data) == 2 @@ -452,7 +451,6 @@ def get_weather(location: str) -> str: # Check that tools are recorded (data is serialized as a string) tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - # Parse the JSON string to verify content tools_data = json.loads(tools_data_str) assert len(tools_data) == 2 @@ -701,14 +699,10 @@ def test_streaming_generate_content( # Verify accumulated response text (all chunks combined) expected_full_text = "Hello! How can I help you today?" # Response text is stored as a JSON string - if stream_gen_ai_spans: - chat_response_text = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) - else: - chat_response_text = json.loads( - chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) + assert chat_response_text == [expected_full_text] # Verify finish reasons (only the final chunk has a finish reason) @@ -762,14 +756,9 @@ def test_streaming_generate_content( # Verify accumulated response text (all chunks combined) expected_full_text = "Hello! How can I help you today?" # Response text is stored as a JSON string - if stream_gen_ai_spans: - chat_response_text = json.loads( - chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) - else: - chat_response_text = json.loads( - chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - ) + chat_response_text = json.loads( + chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) assert chat_response_text == [expected_full_text] # Verify finish reasons (only the final chunk has a finish reason) @@ -1592,7 +1581,6 @@ def test_embed_content( (event,) = events assert event["type"] == "transaction" - assert event["transaction"] == "google_genai_embeddings" # Should have 1 span for embeddings @@ -1697,7 +1685,6 @@ def test_embed_content_string_input( # Check that single string is handled correctly input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) - assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics @@ -2064,7 +2051,6 @@ async def test_async_embed_content_string_input( input_texts = json.loads( embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] ) - assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics @@ -2088,7 +2074,6 @@ async def test_async_embed_content_string_input( # Check that single string is handled correctly input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) - assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index dc9d7925ff..5417cec250 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -938,7 +938,6 @@ def test_chat_completion_streaming( "thread.id": mock.ANY, "thread.name": mock.ANY, } - # usage is not available in older versions of the library if HF_VERSION and HF_VERSION >= (0, 26, 0): expected_data["gen_ai.usage.input_tokens"] = 183 diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index be19e9a790..79ecc7e96b 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -305,11 +305,9 @@ def test_langchain_text_completion( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] - assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["name"] == "text_completion gpt-3.5-turbo" assert llm_span["attributes"]["gen_ai.system"] == "openai" assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline" @@ -340,11 +338,9 @@ def test_langchain_text_completion( for span in tx.get("spans", []) if span.get("op") == "gen_ai.text_completion" ] - assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["description"] == "text_completion gpt-3.5-turbo" assert llm_span["data"]["gen_ai.system"] == "openai" assert llm_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" @@ -587,7 +583,6 @@ def test_langchain_create_agent( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -662,7 +657,6 @@ def test_langchain_create_agent( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -826,7 +820,6 @@ def test_tool_execution_span( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -839,7 +832,6 @@ def test_tool_execution_span( tool_exec_spans = list( x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool" ) - assert len(tool_exec_spans) == 1 tool_exec_span = tool_exec_spans[0] @@ -934,13 +926,11 @@ def test_tool_execution_span( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") assert len(chat_spans) == 2 - tool_exec_spans = list( x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool" ) @@ -1091,7 +1081,6 @@ def test_langchain_openai_tools_agent_no_prompts( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1200,7 +1189,6 @@ def test_langchain_openai_tools_agent_no_prompts( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1358,7 +1346,6 @@ def test_langchain_openai_tools_agent( list(agent_executor.stream({"input": "How many letters in the word eudca"})) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1474,7 +1461,6 @@ def test_langchain_openai_tools_agent( list(agent_executor.stream({"input": "How many letters in the word eudca"})) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1641,7 +1627,6 @@ def test_langchain_openai_tools_agent_with_config( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1668,7 +1653,6 @@ def test_langchain_openai_tools_agent_with_config( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1761,7 +1745,6 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -1872,7 +1855,6 @@ def test_langchain_openai_tools_agent_stream_no_prompts( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -2035,7 +2017,6 @@ def test_langchain_openai_tools_agent_stream( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -2161,7 +2142,6 @@ def test_langchain_openai_tools_agent_stream( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -2329,7 +2309,6 @@ def test_langchain_openai_tools_agent_stream_with_config( ) tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -2356,7 +2335,6 @@ def test_langchain_openai_tools_agent_stream_with_config( ) tx = events[0] - assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" @@ -2496,10 +2474,8 @@ def _llm_type(self) -> str: (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - spans = [item.payload for item in items if item.type == "span"] assert spans[0]["status"] == "error" - (transaction,) = (item.payload for item in items if item.type == "transaction") else: events = capture_events() @@ -3164,7 +3140,6 @@ def test_langchain_embeddings_sync( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3226,7 +3201,6 @@ def test_langchain_embeddings_sync( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3311,11 +3285,9 @@ def test_langchain_embeddings_embed_query( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings" assert ( embeddings_span["attributes"]["gen_ai.request.model"] @@ -3369,11 +3341,9 @@ def test_langchain_embeddings_embed_query( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] - assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" assert ( embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" @@ -3458,7 +3428,6 @@ async def mock_aembed_documents(self, texts): for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3524,7 +3493,6 @@ async def mock_aembed_documents(self, texts): for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3607,7 +3575,6 @@ async def mock_aembed_query(self, text): for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3653,7 +3620,6 @@ async def mock_aembed_query(self, text): for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3719,7 +3685,6 @@ def test_langchain_embeddings_no_model_name( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3762,7 +3727,6 @@ def test_langchain_embeddings_no_model_name( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 1 embeddings_span = embeddings_spans[0] @@ -3900,7 +3864,6 @@ def test_langchain_embeddings_multiple_providers( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - # Should have 2 spans, one for each provider assert len(embeddings_spans) == 2 @@ -3951,7 +3914,6 @@ def test_langchain_embeddings_multiple_providers( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - # Should have 2 spans, one for each provider assert len(embeddings_spans) == 2 @@ -4054,7 +4016,6 @@ def test_langchain_embeddings_multiple_calls( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 3 # Verify all spans have proper data @@ -4109,7 +4070,6 @@ def test_langchain_embeddings_multiple_calls( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 3 # Verify all spans have proper data @@ -4173,9 +4133,7 @@ def test_langchain_embeddings_span_hierarchy( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - tx = next(item.payload for item in items if item.type == "transaction") - custom_spans = [ span for span in tx.get("spans", []) if span.get("op") == "custom" ] @@ -4220,7 +4178,6 @@ def test_langchain_embeddings_span_hierarchy( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - custom_spans = [ span for span in tx.get("spans", []) if span.get("op") == "custom" ] @@ -4290,7 +4247,6 @@ def test_langchain_embeddings_with_list_and_string_inputs( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 2 # Both should have input data captured as lists @@ -4342,7 +4298,6 @@ def test_langchain_embeddings_with_list_and_string_inputs( for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" ] - assert len(embeddings_spans) == 2 # Both should have input data captured as lists @@ -4412,11 +4367,9 @@ def test_langchain_response_model_extraction( for span in spans if span["attributes"].get("sentry.op") == "gen_ai.text_completion" ] - assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" if expected_model is not None: @@ -4455,11 +4408,9 @@ def test_langchain_response_model_extraction( for span in tx.get("spans", []) if span.get("op") == "gen_ai.text_completion" ] - assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" if expected_model is not None: diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 6dd5c3cace..f8df60739f 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -186,7 +186,6 @@ def original_compile(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT ] - assert len(agent_spans) == 1 agent_span = agent_spans[0] @@ -200,7 +199,6 @@ def original_compile(self, *args, **kwargs): assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"] tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data == ["search_tool", "calculator"] assert len(tools_data) == 2 assert "search_tool" in tools_data @@ -223,7 +221,6 @@ def original_compile(self, *args, **kwargs): agent_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT ] - assert len(agent_spans) == 1 agent_span = agent_spans[0] @@ -312,11 +309,9 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["name"] == "invoke_agent test_graph" assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" assert ( @@ -381,11 +376,9 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent test_graph" assert invoke_span["origin"] == "auto.ai.langgraph" assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" @@ -494,11 +487,9 @@ async def run_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["name"] == "invoke_agent async_graph" assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" assert ( @@ -549,11 +540,9 @@ async def run_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent async_graph" assert invoke_span["origin"] == "auto.ai.langgraph" assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" @@ -622,7 +611,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -640,7 +628,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -687,7 +674,6 @@ async def run_error_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -701,7 +687,6 @@ async def run_error_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -797,7 +782,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -825,7 +809,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -909,7 +892,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -939,7 +921,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1028,7 +1009,6 @@ async def run_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1055,7 +1035,6 @@ async def run_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1152,7 +1131,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1176,7 +1154,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1270,7 +1247,6 @@ async def run_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1291,7 +1267,6 @@ async def run_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1371,7 +1346,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1398,7 +1372,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1483,7 +1456,6 @@ async def run_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1507,7 +1479,6 @@ async def run_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1602,7 +1573,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1628,7 +1598,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1725,7 +1694,6 @@ async def run_test(): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1748,7 +1716,6 @@ async def run_test(): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_agent_span = invoke_spans[0] @@ -1870,7 +1837,6 @@ def original_invoke(self, *args, **kwargs): for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] @@ -1895,7 +1861,6 @@ def original_invoke(self, *args, **kwargs): invoke_spans = [ span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT ] - assert len(invoke_spans) == 1 invoke_span = invoke_spans[0] diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index a0120cd7b5..22663f9472 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -688,7 +688,6 @@ def test_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - spans = [item.payload for item in items if item.type == "span"] spans = list( x @@ -710,7 +709,6 @@ def test_embeddings_create( ) # Check that embeddings input is captured (it's JSON serialized) embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == ["Hello, world!"] else: events = capture_events() @@ -731,7 +729,6 @@ def test_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -808,7 +805,6 @@ async def test_async_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - spans = [item.payload for item in items if item.type == "span"] spans = list( x @@ -816,7 +812,6 @@ async def test_async_embeddings_create( if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(spans) == 1 span = spans[0] @@ -852,7 +847,6 @@ async def test_async_embeddings_create( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -930,7 +924,6 @@ def test_embeddings_create_with_list_input( if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(spans) == 1 span = spans[0] @@ -963,7 +956,6 @@ def test_embeddings_create_with_list_input( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -1034,7 +1026,6 @@ async def test_async_embeddings_create_with_list_input( # Response is processed by litellm, so just check it exists assert response is not None - spans = [item.payload for item in items if item.type == "span"] spans = list( x @@ -1042,7 +1033,6 @@ async def test_async_embeddings_create_with_list_input( if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(spans) == 1 span = spans[0] @@ -1076,7 +1066,6 @@ async def test_async_embeddings_create_with_list_input( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -1145,7 +1134,6 @@ def test_embeddings_no_pii( # Response is processed by litellm, so just check it exists assert response is not None - spans = [item.payload for item in items if item.type == "span"] spans = list( x @@ -1153,7 +1141,6 @@ def test_embeddings_no_pii( if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(spans) == 1 span = spans[0] @@ -1179,7 +1166,6 @@ def test_embeddings_no_pii( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -1251,7 +1237,6 @@ async def test_async_embeddings_no_pii( if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(spans) == 1 span = spans[0] @@ -1278,7 +1263,6 @@ async def test_async_embeddings_no_pii( # Response is processed by litellm, so just check it exists assert response is not None - assert len(events) == 1 (event,) = events @@ -2391,7 +2375,6 @@ def test_litellm_message_truncation( for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] - assert len(chat_spans) > 0 chat_span = chat_spans[0] @@ -2873,7 +2856,6 @@ async def test_async_binary_content_encoding_mixed_content( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) @@ -2901,7 +2883,6 @@ async def test_async_binary_content_encoding_mixed_content( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) @@ -2982,10 +2963,8 @@ def test_binary_content_encoding_uri_type( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -3010,10 +2989,8 @@ def test_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( @@ -3101,10 +3078,8 @@ async def test_async_binary_content_encoding_uri_type( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -3130,10 +3105,8 @@ async def test_async_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index af0932eeb9..6c113078a3 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -165,7 +165,6 @@ def test_nonstreaming_chat_completion_no_prompts( ) assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -207,7 +206,6 @@ def test_nonstreaming_chat_completion_no_prompts( ) assert response == "the model response" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -325,7 +323,6 @@ def test_nonstreaming_chat_completion( ) assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -387,7 +384,6 @@ def test_nonstreaming_chat_completion( ) assert response == "the model response" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -490,7 +486,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response = response.choices[0].message.content assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -529,7 +524,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -645,7 +639,6 @@ async def test_nonstreaming_chat_completion_async( response = response.choices[0].message.content assert response == "the model response" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -704,7 +697,6 @@ async def test_nonstreaming_chat_completion_async( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -862,7 +854,6 @@ def test_streaming_chat_completion_no_prompts( ) assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -915,7 +906,6 @@ def test_streaming_chat_completion_no_prompts( ) assert response_string == "hello world" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -1488,9 +1478,7 @@ def test_streaming_chat_completion( response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) ) - assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -1567,9 +1555,7 @@ def test_streaming_chat_completion( response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) ) - assert response_string == "hello world" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -1736,7 +1722,6 @@ async def test_streaming_chat_completion_async_no_prompts( response_string += x.choices[0].delta.content assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -1791,7 +1776,6 @@ async def test_streaming_chat_completion_async_no_prompts( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -1967,7 +1951,6 @@ async def test_streaming_chat_completion_async( response_string += x.choices[0].delta.content assert response_string == "hello world" - span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2065,7 +2048,6 @@ async def test_streaming_chat_completion_async( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] @@ -2694,7 +2676,6 @@ async def test_embeddings_create_async( ) param_id = request.node.callspec.id - if ( "string" in param_id and "string_sequence" not in param_id @@ -2745,7 +2726,6 @@ async def test_embeddings_create_async( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" param_id = request.node.callspec.id - if ( "string" in param_id and "string_sequence" not in param_id @@ -4975,6 +4955,162 @@ async def test_ai_client_span_streaming_responses_async_api( "thread.id": mock.ANY, "thread.name": mock.ANY, } + + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["attributes"] == expected_data else: events = capture_events() @@ -5023,163 +5159,160 @@ async def test_ai_client_span_streaming_responses_async_api( "thread.name": mock.ANY, } - param_id = request.node.callspec.id - if "string" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "string" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - } - ] - ), - "gen_ai.request.messages": safe_serialize( - ["How do I check if a Python object is an instance of a class?"] - ), - } - ) - elif "blocks_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [{"type": "text", "content": "You are a helpful assistant."}] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "blocks" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id and ( - instructions is None or isinstance(instructions, Omit) - ): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif "parts_no_type" in param_id: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] - ), - } - ) - elif instructions is None or isinstance(instructions, Omit): # type: ignore - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) - else: - expected_data.update( - { - "gen_ai.system_instructions": safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ), - "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] - ), - } - ) + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) - if stream_gen_ai_spans: - assert spans[0]["attributes"] == expected_data - else: assert spans[0]["data"] == expected_data @@ -5774,7 +5907,6 @@ def test_openai_message_truncation( assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -5796,7 +5928,6 @@ def test_openai_message_truncation( assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 5cea5063ff..42a666644e 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -95,7 +95,6 @@ async def test_agent_run_async( chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 # Check chat span @@ -126,7 +125,6 @@ async def test_agent_run_async( # Find child span types (invoke_agent is the transaction, not a child span) chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 # Check chat span @@ -282,8 +280,6 @@ def test_agent_run_sync( assert result is not None assert result.output is not None - spans = [item.payload for item in items if item.type == "span"] - # Verify transaction (transaction,) = (item.payload for item in items if item.type == "transaction") @@ -292,10 +288,10 @@ def test_agent_run_sync( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find span types + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 # Verify streaming flag is False for sync @@ -318,7 +314,6 @@ def test_agent_run_sync( # Find span types chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 # Verify streaming flag is False for sync @@ -404,8 +399,6 @@ async def test_agent_run_stream( async for _ in result.stream_output(): pass - spans = [item.payload for item in items if item.type == "span"] - # Verify transaction (transaction,) = (item.payload for item in items if item.type == "transaction") @@ -414,10 +407,10 @@ async def test_agent_run_stream( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find chat spans + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 # Verify streaming flag is True for streaming @@ -447,7 +440,6 @@ async def test_agent_run_stream( # Find chat spans chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 # Verify streaming flag is True for streaming @@ -501,7 +493,6 @@ async def test_agent_run_stream_events( chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 # run_stream_events uses run() internally, so streaming should be False @@ -521,7 +512,6 @@ async def test_agent_run_stream_events( # Find chat spans spans = transaction["spans"] chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 # run_stream_events uses run() internally, so streaming should be False @@ -1082,7 +1072,6 @@ async def test_system_prompt_attribute( chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] @@ -1109,7 +1098,6 @@ async def test_system_prompt_attribute( # The transaction IS the invoke_agent span, check for messages in chat spans instead chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] @@ -1332,7 +1320,6 @@ async def run_agent(input_text): results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) assert len(results) == 3 - assert len(events) == 3 # Verify each transaction is separate @@ -1393,7 +1380,6 @@ async def test_message_history( # Check the second transaction has the full history second_transaction = events[1] spans = second_transaction["spans"] - chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] @@ -1418,7 +1404,6 @@ async def test_message_history( # Check the second transaction has the full history second_transaction = events[1] spans = second_transaction["spans"] - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] if chat_spans: @@ -2160,7 +2145,6 @@ async def test_invoke_agent_with_instructions( chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] @@ -2186,7 +2170,6 @@ async def test_invoke_agent_with_instructions( # The transaction IS the invoke_agent span, check for messages in chat spans instead chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] @@ -2378,7 +2361,7 @@ async def test_agent_data_from_scope( # Verify agent name is capture (transaction,) = events - # Verify agent name is captured + # Verify agent name is captured assert transaction["transaction"] == "invoke_agent test_scope_agent" @@ -3917,7 +3900,6 @@ async def test_binary_content_in_agent_run( chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] @@ -3934,7 +3916,6 @@ async def test_binary_content_in_agent_run( (transaction,) = events chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 chat_span = chat_spans[0] From dde7bf4d4ae1d0413baf0fc2680069e6facf7884 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:10:58 +0200 Subject: [PATCH 76/84] restore type annotations in huggingface_hub tests --- .../huggingface_hub/test_huggingface_hub.py | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 5417cec250..4772eb368f 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -17,7 +17,7 @@ if TYPE_CHECKING: - pass + from typing import Any HF_VERSION = package_version("huggingface-hub") @@ -471,14 +471,14 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock): @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_text_generation_api, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_text_generation_api: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, @@ -606,14 +606,14 @@ def test_text_generation( @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation_streaming( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_text_generation_api_streaming, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_text_generation_api_streaming: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, @@ -739,14 +739,14 @@ def test_text_generation_streaming( @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_chat_completion_api, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, @@ -876,14 +876,14 @@ def test_chat_completion( @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_chat_completion_api_streaming, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_streaming: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, @@ -1015,12 +1015,12 @@ def test_chat_completion_streaming( @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_chat_completion_api_error( - sentry_init, - capture_events, - capture_items, - mock_hf_api_with_errors, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + mock_hf_api_with_errors: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, @@ -1129,12 +1129,12 @@ def test_chat_completion_api_error( @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.httpx_mock(assert_all_requests_were_expected=False) def test_span_status_error( - sentry_init, - capture_events, - capture_items, - mock_hf_api_with_errors, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + mock_hf_api_with_errors: "Any", + stream_gen_ai_spans: "Any", +) -> None: client = get_hf_provider_inference_client() sentry_init( @@ -1197,13 +1197,13 @@ def test_span_status_error( @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_with_tools( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_chat_completion_api_tools, - stream_gen_ai_spans, + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_tools: "Any", + stream_gen_ai_spans: "Any", ): sentry_init( traces_sample_rate=1.0, @@ -1353,14 +1353,14 @@ def test_chat_completion_with_tools( @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming_with_tools( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - mock_hf_chat_completion_api_streaming_tools, - stream_gen_ai_spans, -): + sentry_init: "Any", + capture_events: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_streaming_tools: "Any", + stream_gen_ai_spans: "Any", +) -> None: sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, From 913ec9af4eefb8296bba602e65ade30be9efa9b1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:29:43 +0200 Subject: [PATCH 77/84] litellm test --- tests/integrations/litellm/test_litellm.py | 103 ++++++--------------- 1 file changed, 29 insertions(+), 74 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index c04619d838..b463387daf 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2325,20 +2325,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_litellm_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2352,78 +2346,39 @@ def test_litellm_message_truncation( ] mock_response = MockCompletionResponse() - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From f2bdff5cc6967b30cf08796fbd7eddd92c7a2746 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:31:11 +0200 Subject: [PATCH 78/84] remove whitespace changes --- tests/integrations/litellm/test_litellm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b463387daf..b76980ddd3 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2920,7 +2920,6 @@ def test_binary_content_encoding_uri_type( ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -2948,7 +2947,6 @@ def test_binary_content_encoding_uri_type( assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( @@ -3036,10 +3034,8 @@ async def test_async_binary_content_encoding_uri_type( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -3065,10 +3061,8 @@ async def test_async_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( From ec26b90f87b22ad048a0024f144143fa5b4cf385 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:31:46 +0200 Subject: [PATCH 79/84] one more whitespace removal --- tests/integrations/litellm/test_litellm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b76980ddd3..aab289b28f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2944,7 +2944,6 @@ def test_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) From 4ec3ff7e96f6ea4c2c2764b6a0d91eed4b497d08 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:48:43 +0200 Subject: [PATCH 80/84] remove truncation per integration instead --- sentry_sdk/ai/utils.py | 8 ---- sentry_sdk/integrations/anthropic.py | 8 +++- sentry_sdk/integrations/google_genai/utils.py | 7 ++- sentry_sdk/integrations/langchain.py | 38 +++++++++++---- sentry_sdk/integrations/langgraph.py | 20 ++++++-- sentry_sdk/integrations/litellm.py | 14 ++++-- sentry_sdk/integrations/openai.py | 46 +++++++++++++++---- .../openai_agents/spans/invoke_agent.py | 7 ++- .../integrations/openai_agents/utils.py | 7 ++- .../pydantic_ai/spans/ai_client.py | 7 ++- .../pydantic_ai/spans/invoke_agent.py | 7 ++- 11 files changed, 127 insertions(+), 42 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index fb9edcd335..8efa077ce5 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -741,10 +741,6 @@ def truncate_and_annotate_messages( scope: "Any", max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": - client = sentry_sdk.get_client() - if client.options.get("stream_gen_ai_spans", False): - return messages - if not messages: return None @@ -765,10 +761,6 @@ def truncate_and_annotate_embedding_inputs( scope: "Any", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": - client = sentry_sdk.get_client() - if client.options.get("stream_gen_ai_spans", False): - return messages - if not messages: return None diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index efc2f70ffd..ca9e60e59d 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -438,9 +438,13 @@ def _set_common_input_data( normalized_messages.append(transformed_message) role_normalized_messages = normalize_message_roles(normalized_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - role_normalized_messages, span, scope + messages_data = ( + role_normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(role_normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 25763ebe07..55a5b80233 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -892,9 +892,12 @@ def set_span_data_for_request( if messages: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8acf215bfe..4f5a1b4939 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -374,9 +374,15 @@ def on_llm_start( } for prompt in prompts ] + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -463,9 +469,15 @@ def on_chat_model_start( self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -992,9 +1004,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -1049,9 +1067,13 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index e5ea12b90a..1454d151f4 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -181,9 +181,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_input_messages, span, scope + messages_data = ( + normalized_input_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_input_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -234,9 +240,15 @@ async def new_ainvoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_input_messages, span, scope + messages_data = ( + normalized_input_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_input_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 3cff0fbc23..9561bd61f3 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -119,8 +119,11 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: if isinstance(embedding_input, list) else [embedding_input] ) - messages_data = truncate_and_annotate_embedding_inputs( - input_list, span, scope + client = sentry_sdk.get_client() + messages_data = ( + input_list + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs(input_list, span, scope) ) if messages_data is not None: set_data_normalized( @@ -133,9 +136,14 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: # For chat, look for the 'messages' parameter messages = kwargs.get("messages", []) if messages: + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages = _convert_message_parts(messages) - messages_data = truncate_and_annotate_messages(messages, span, scope) + messages_data = ( + messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index b3919d1a9d..7bb328741e 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -398,8 +398,13 @@ def _set_responses_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -413,8 +418,13 @@ def _set_responses_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -472,8 +482,13 @@ def _set_completions_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -503,8 +518,13 @@ def _set_completions_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -539,9 +559,14 @@ def _set_embeddings_input_data( set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -560,9 +585,14 @@ def _set_embeddings_input_data( if len(messages) > 0: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 27f9fdab25..2346189a96 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -63,9 +63,12 @@ def invoke_agent_span( if len(messages) > 0: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index ee504b3496..ea1faefde7 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -173,8 +173,13 @@ def _set_input_data( ) normalized_messages = normalize_message_roles(request_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index dc95acad45..e549083fed 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -182,9 +182,12 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non if formatted_messages: normalized_messages = normalize_message_roles(formatted_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index ee08ca7036..c507315dcd 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -122,9 +122,12 @@ def invoke_agent_span( if messages: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False From 962fd656b084deeb5c465d13dd2234c793fa0995 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 13:33:56 +0200 Subject: [PATCH 81/84] update tests with to have more than one input message --- .../integrations/anthropic/test_anthropic.py | 120 ++++-- .../google_genai/test_google_genai.py | 50 ++- .../huggingface_hub/test_huggingface_hub.py | 37 +- .../integrations/langchain/test_langchain.py | 52 ++- tests/integrations/litellm/test_litellm.py | 36 +- tests/integrations/openai/test_openai.py | 366 +++++++++++++++--- .../openai_agents/test_openai_agents.py | 274 +++++++++++++ .../pydantic_ai/test_pydantic_ai.py | 52 ++- 8 files changed, 891 insertions(+), 96 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 4255a0e6fc..d6b2c269d9 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -110,10 +110,14 @@ def test_nonstreaming_create_message( client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -144,10 +148,16 @@ def test_nonstreaming_create_message( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." ) @@ -245,10 +255,14 @@ async def test_nonstreaming_create_message_async( client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -279,10 +293,16 @@ async def test_nonstreaming_create_message_async( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." ) @@ -413,10 +433,14 @@ def test_streaming_create_message( ) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -449,10 +473,16 @@ def test_streaming_create_message( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" ) @@ -895,10 +925,14 @@ def test_stream_messages( ) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -931,10 +965,16 @@ def test_stream_messages( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" ) @@ -1390,10 +1430,14 @@ async def test_streaming_create_message_async( ) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -1425,10 +1469,16 @@ async def test_streaming_create_message_async( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" ) @@ -1883,10 +1933,14 @@ async def test_stream_message_async( ) messages = [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Hello, Claude", - } + }, ] if stream_gen_ai_spans: @@ -1919,10 +1973,16 @@ async def test_stream_message_async( assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert ( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - == '[{"role": "user", "content": "Hello, Claude"}]' - ) + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "Hello, Claude", + }, + ] assert ( span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" ) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ff0b59178b..723a71959d 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -153,7 +153,12 @@ def test_nonstreaming_generate_content( ), start_transaction(name="google_genai"): config = create_test_config(temperature=0.7, max_output_tokens=100) mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="Tell me a joke", config=config + model="gemini-1.5-flash", + contents=[ + "Message demonstrating the absence of truncation.", + "Tell me a joke", + ], + config=config, ) (event,) = (item.payload for item in items if item.type == "transaction") @@ -173,6 +178,24 @@ def test_nonstreaming_generate_content( ) if send_default_pii and include_prompts: + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Tell me a joke", + }, + ], + } + ] + # Response text is stored as a JSON array response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -675,7 +698,12 @@ def test_streaming_generate_content( ), start_transaction(name="google_genai"): config = create_test_config() stream = mock_genai_client.models.generate_content_stream( - model="gemini-1.5-flash", contents="Stream me a response", config=config + model="gemini-1.5-flash", + contents=[ + "Message demonstrating the absence of truncation.", + "Stream me a response", + ], + config=config, ) # Consume the stream (this is what users do with the integration wrapper) @@ -693,6 +721,24 @@ def test_streaming_generate_content( assert len(spans) == 1 chat_span = next(item.payload for item in items if item.type == "span") + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Stream me a response", + }, + ], + } + ] + # Check that streaming flag is set on both spans assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 4772eb368f..85ad55a47c 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from unittest import mock +from sentry_sdk.utils import safe_serialize import pytest import responses from huggingface_hub import InferenceClient @@ -761,7 +762,13 @@ def test_chat_completion( with sentry_sdk.start_transaction(name="test"): client.chat_completion( - messages=[{"role": "user", "content": "Hello!"}], + messages=[ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ], stream=False, ) @@ -804,8 +811,14 @@ def test_chat_completion( } if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "Hello!"}]' + expected_data["gen_ai.request.messages"] = safe_serialize( + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ] ) expected_data["gen_ai.response.text"] = ( "[mocked] Hello! How can I help you today?" @@ -899,7 +912,13 @@ def test_chat_completion_streaming( with sentry_sdk.start_transaction(name="test"): _ = list( client.chat_completion( - [{"role": "user", "content": "Hello!"}], + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ], stream=True, ) ) @@ -945,8 +964,14 @@ def test_chat_completion_streaming( expected_data["gen_ai.usage.total_tokens"] = 197 if send_default_pii and include_prompts: - expected_data["gen_ai.request.messages"] = ( - '[{"role": "user", "content": "Hello!"}]' + expected_data["gen_ai.request.messages"] = safe_serialize( + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ] ) expected_data["gen_ai.response.text"] = "the mocked model response" diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 2c0f8af977..3a2ef76a5a 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -577,6 +577,9 @@ def test_langchain_create_agent( agent.invoke( { "messages": [ + HumanMessage( + content="Message demonstrating the absence of truncation." + ), HumanMessage(content="How many letters in the word eudca"), ], }, @@ -606,6 +609,19 @@ def test_langchain_create_agent( == "Hello, how can I help you?" ) + assert json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + { + "role": "user", + "content": "How many letters in the word eudca", + }, + ] + param_id = request.node.callspec.id if "string" in param_id: assert [ @@ -1343,7 +1359,16 @@ def test_langchain_openai_tools_agent( "send", side_effect=[tool_response, final_response], ) as _, start_transaction(): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + list( + agent_executor.stream( + { + "input": [ + "Message demonstrating the absence of truncation.", + "How many letters in the word eudca", + ] + } + ) + ) tx = next(item.payload for item in items if item.type == "transaction") assert tx["type"] == "transaction" @@ -1389,6 +1414,15 @@ def test_langchain_openai_tools_agent( assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + assert json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": "['Message demonstrating the absence of truncation.', 'How many letters in the word eudca']", + } + ] + param_id = request.node.callspec.id if "string" in param_id: assert [ @@ -2011,7 +2045,12 @@ def test_langchain_openai_tools_agent_stream( ) as _, start_transaction(): list( agent_executor.stream( - {"input": "How many letters in the word eudca"}, + { + "input": [ + "Message demonstrating the absence of truncation.", + "How many letters in the word eudca", + ] + }, {"run_name": "my-snazzy-pipeline"}, ) ) @@ -2065,6 +2104,15 @@ def test_langchain_openai_tools_agent_stream( assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + assert json.loads( + chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": "['Message demonstrating the absence of truncation.', 'How many letters in the word eudca']", + } + ] + param_id = request.node.callspec.id if "string" in param_id: assert [ diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index aab289b28f..703ae67b1a 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -159,7 +159,10 @@ def test_nonstreaming_chat_completion( stream_gen_ai_spans=stream_gen_ai_spans, ) - messages = [{"role": "user", "content": "Hello!"}] + messages = [ + {"role": "user", "content": "Message demonstrating the absence of truncation."}, + {"role": "user", "content": "Hello!"}, + ] client = OpenAI(api_key="test-key") @@ -216,7 +219,13 @@ def test_nonstreaming_chat_completion( assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ] assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] @@ -302,7 +311,10 @@ async def test_async_nonstreaming_chat_completion( stream_gen_ai_spans=stream_gen_ai_spans, ) - messages = [{"role": "user", "content": "Hello!"}] + messages = [ + {"role": "user", "content": "Message demonstrating the absence of truncation."}, + {"role": "user", "content": "Hello!"}, + ] client = AsyncOpenAI(api_key="test-key") @@ -360,7 +372,13 @@ async def test_async_nonstreaming_chat_completion( assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "Hello!"}, + ] assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"] else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] @@ -448,7 +466,10 @@ def test_streaming_chat_completion( stream_gen_ai_spans=stream_gen_ai_spans, ) - messages = [{"role": "user", "content": "Hello!"}] + messages = [ + {"role": "user", "content": "Message demonstrating the absence of truncation."}, + {"role": "user", "content": "Hello!"}, + ] client = OpenAI(api_key="test-key") @@ -556,7 +577,10 @@ async def test_async_streaming_chat_completion( stream_gen_ai_spans=stream_gen_ai_spans, ) - messages = [{"role": "user", "content": "Hello!"}] + messages = [ + {"role": "user", "content": "Message demonstrating the absence of truncation."}, + {"role": "user", "content": "Hello!"}, + ] client = AsyncOpenAI(api_key="test-key") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c9e734da69..5bc9e35b22 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -239,6 +239,10 @@ def test_nonstreaming_chat_completion_no_prompts( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks", @@ -252,6 +256,10 @@ def test_nonstreaming_chat_completion_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts", @@ -266,6 +274,10 @@ def test_nonstreaming_chat_completion_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ] ), @@ -360,6 +372,10 @@ def test_nonstreaming_chat_completion( ] assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert ( + "Message demonstrating the absence of truncation." + in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 @@ -558,6 +574,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks", @@ -571,6 +591,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts", @@ -585,6 +609,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ] ), @@ -676,6 +704,10 @@ async def test_nonstreaming_chat_completion_async( ] assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert ( + "Message demonstrating the absence of truncation." + in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 @@ -1353,6 +1385,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks", @@ -1366,6 +1402,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts", @@ -1380,6 +1420,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ] ), @@ -1517,6 +1561,10 @@ def test_streaming_chat_completion( assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert ( + "Message demonstrating the absence of truncation." + in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -1525,12 +1573,12 @@ def test_streaming_chat_completion( if "blocks" in param_id: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 17 else: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 22 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1600,12 +1648,12 @@ def test_streaming_chat_completion( if "blocks" in param_id: assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["data"]["gen_ai.usage.input_tokens"] == 15 + assert span["data"]["gen_ai.usage.total_tokens"] == 17 else: assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 22 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1819,6 +1867,10 @@ async def test_streaming_chat_completion_async_no_prompts( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks", @@ -1832,6 +1884,10 @@ async def test_streaming_chat_completion_async_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts", @@ -1846,6 +1902,10 @@ async def test_streaming_chat_completion_async_no_prompts( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ] ), @@ -1976,6 +2036,10 @@ async def test_streaming_chat_completion_async( } ] + assert ( + "Message demonstrating the absence of truncation." + in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -1984,12 +2048,12 @@ async def test_streaming_chat_completion_async( if "blocks" in param_id: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 17 else: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 22 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -2007,6 +2071,10 @@ async def test_streaming_chat_completion_async( }, ] + assert ( + "Message demonstrating the absence of truncation." + in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -2015,12 +2083,12 @@ async def test_streaming_chat_completion_async( if "blocks" in param_id: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 17 else: assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 22 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -2092,12 +2160,12 @@ async def test_streaming_chat_completion_async( if "blocks" in param_id: assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["data"]["gen_ai.usage.input_tokens"] == 15 + assert span["data"]["gen_ai.usage.total_tokens"] == 17 else: assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 22 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -3736,6 +3804,10 @@ def test_ai_client_span_responses_api_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks_no_type", @@ -3747,6 +3819,11 @@ def test_ai_client_span_responses_api_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="blocks", @@ -3760,6 +3837,10 @@ def test_ai_client_span_responses_api_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts_no_type", @@ -3774,6 +3855,11 @@ def test_ai_client_span_responses_api_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="parts", @@ -3880,7 +3966,13 @@ def test_ai_client_span_responses_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -3897,7 +3989,13 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -3910,7 +4008,14 @@ def test_ai_client_span_responses_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -3927,7 +4032,14 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -3943,7 +4055,13 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -3961,7 +4079,13 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -3975,7 +4099,14 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -3993,7 +4124,14 @@ def test_ai_client_span_responses_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -4348,6 +4486,10 @@ def test_error_in_responses_api( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks_no_type", @@ -4359,6 +4501,11 @@ def test_error_in_responses_api( "role": "system", "content": "You are a helpful assistant.", }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="blocks", @@ -4372,6 +4519,10 @@ def test_error_in_responses_api( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts_no_type", @@ -4386,6 +4537,11 @@ def test_error_in_responses_api( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="parts", @@ -4492,7 +4648,13 @@ async def test_ai_client_span_responses_async_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -4509,7 +4671,13 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -4522,7 +4690,14 @@ async def test_ai_client_span_responses_async_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -4539,7 +4714,14 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -4555,7 +4737,13 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -4573,7 +4761,13 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -4587,7 +4781,14 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -4605,7 +4806,14 @@ async def test_ai_client_span_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -4830,6 +5038,10 @@ async def test_ai_client_span_responses_async_api( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="blocks_no_type", @@ -4841,6 +5053,11 @@ async def test_ai_client_span_responses_async_api( "role": "system", "content": "You are a helpful assistant.", }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="blocks", @@ -4854,6 +5071,10 @@ async def test_ai_client_span_responses_async_api( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"role": "user", "content": "hello"}, ], id="parts_no_type", @@ -4868,6 +5089,11 @@ async def test_ai_client_span_responses_async_api( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, {"type": "message", "role": "user", "content": "hello"}, ], id="parts", @@ -4992,7 +5218,13 @@ async def test_ai_client_span_streaming_responses_async_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -5009,7 +5241,13 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -5022,7 +5260,14 @@ async def test_ai_client_span_streaming_responses_async_api( [{"type": "text", "content": "You are a helpful assistant."}] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -5039,7 +5284,14 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -5055,7 +5307,13 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -5073,7 +5331,13 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"role": "user", "content": "hello"}] + [ + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"role": "user", "content": "hello"}, + ] ), } ) @@ -5087,7 +5351,14 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) @@ -5105,7 +5376,14 @@ async def test_ai_client_span_streaming_responses_async_api( ] ), "gen_ai.request.messages": safe_serialize( - [{"type": "message", "role": "user", "content": "hello"}] + [ + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ] ), } ) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index beb44471de..5589352e5c 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -313,6 +313,10 @@ async def test_agent_invocation_span_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Test input", @@ -327,6 +331,10 @@ async def test_agent_invocation_span_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "type": "message", "role": "user", @@ -344,6 +352,10 @@ async def test_agent_invocation_span_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Test input", @@ -361,6 +373,10 @@ async def test_agent_invocation_span_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "type": "message", "role": "user", @@ -462,6 +478,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks_no_type" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -474,6 +505,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id and instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -482,6 +528,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -494,6 +555,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id and instructions is None: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -503,6 +579,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -516,6 +607,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -525,6 +631,21 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -539,6 +660,21 @@ async def test_agent_invocation_span( ] ) + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + assert ( invoke_agent_span["attributes"]["gen_ai.response.text"] == "Hello, how can I help you?" @@ -960,6 +1096,10 @@ def test_agent_invocation_span_sync_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Test input", @@ -974,6 +1114,11 @@ def test_agent_invocation_span_sync_no_pii( "role": "system", "content": "You are a helpful assistant.", }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "type": "message", "role": "user", @@ -991,6 +1136,10 @@ def test_agent_invocation_span_sync_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "role": "user", "content": "Test input", @@ -1008,6 +1157,11 @@ def test_agent_invocation_span_sync_no_pii( {"type": "text", "text": "Be concise and clear."}, ], }, + { + "type": "message", + "role": "user", + "content": "Message demonstrating the absence of truncation.", + }, { "type": "message", "role": "user", @@ -1114,6 +1268,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks_no_type" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1126,6 +1295,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id and instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1134,6 +1318,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1146,6 +1345,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id and instructions is None: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1155,6 +1369,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1168,6 +1397,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif instructions is None: # type: ignore assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1177,6 +1421,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" @@ -1190,6 +1449,21 @@ def test_agent_invocation_span_sync( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] else: with patch.object( agent.model._client._client, diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 42a666644e..bcfb9f1df8 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -76,7 +76,9 @@ async def test_agent_run_async( if stream_gen_ai_spans: items = capture_items("transaction", "span") - result = await test_agent.run("Test input") + result = await test_agent.run( + ["Message demonstrating the absence of truncation.", "Test input"] + ) assert result is not None assert result.output is not None @@ -102,7 +104,23 @@ async def test_agent_run_async( assert "chat" in chat_span["name"] assert chat_span["attributes"]["gen_ai.operation.name"] == "chat" assert chat_span["attributes"]["gen_ai.response.streaming"] is False - assert "gen_ai.request.messages" in chat_span["attributes"] + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Test input", + }, + ], + } + ] assert "gen_ai.usage.input_tokens" in chat_span["attributes"] assert "gen_ai.usage.output_tokens" in chat_span["attributes"] else: @@ -275,7 +293,9 @@ def test_agent_run_sync( if stream_gen_ai_spans: items = capture_items("transaction", "span") - result = test_agent.run_sync("Test input") + result = test_agent.run_sync( + ["Message demonstrating the absence of truncation.", "Test input"] + ) assert result is not None assert result.output is not None @@ -394,7 +414,9 @@ async def test_agent_run_stream( if stream_gen_ai_spans: items = capture_items("transaction", "span") - async with test_agent.run_stream("Test input") as result: + async with test_agent.run_stream( + ["Message demonstrating the absence of truncation.", "Test input"] + ) as result: # Consume the stream async for _ in result.stream_output(): pass @@ -416,7 +438,23 @@ async def test_agent_run_stream( # Verify streaming flag is True for streaming for chat_span in chat_spans: assert chat_span["attributes"]["gen_ai.response.streaming"] is True - assert "gen_ai.request.messages" in chat_span["attributes"] + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Test input", + }, + ], + } + ] assert "gen_ai.usage.input_tokens" in chat_span["attributes"] # Streaming responses should still have output data assert ( @@ -479,7 +517,9 @@ async def test_agent_run_stream_events( if stream_gen_ai_spans: items = capture_items("transaction", "span") - async for _ in test_agent.run_stream_events("Test input"): + async for _ in test_agent.run_stream_events( + ["Message demonstrating the absence of truncation.", "Test input"] + ): pass # Verify transaction From 425ae279029edef423e2f1d466487250002da497 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 13:39:18 +0200 Subject: [PATCH 82/84] cleanup one openai test --- tests/integrations/openai/test_openai.py | 46 ++++++++---------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 6c113078a3..934a0b8f4e 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1975,24 +1975,6 @@ async def test_streaming_chat_completion_async( "content": "You are a helpful assistant.", } ] - - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import - - if "blocks" in param_id: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 - else: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 - - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly else: assert json.loads( span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] @@ -2007,23 +1989,23 @@ async def test_streaming_chat_completion_async( }, ] - assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import - if "blocks" in param_id: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 - else: - assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 - assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 - assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 + if "blocks" in param_id: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 - except ImportError: - pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly else: events = capture_events() From 47680da69e4481059c00c7707fe9172169144264 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 13:43:56 +0200 Subject: [PATCH 83/84] add message in openai_agents tests --- tests/integrations/openai_agents/test_openai_agents.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 5589352e5c..430d60ac01 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -332,6 +332,7 @@ async def test_agent_invocation_span_no_pii( "content": "You are a helpful assistant.", }, { + "type": "message", "role": "user", "content": "Message demonstrating the absence of truncation.", }, @@ -374,6 +375,7 @@ async def test_agent_invocation_span_no_pii( ], }, { + "type": "message", "role": "user", "content": "Message demonstrating the absence of truncation.", }, From 7f01f968e5d3af6bdf474265a6a15000f94acb17 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 14:54:38 +0200 Subject: [PATCH 84/84] merge follow up --- .../integrations/anthropic/test_anthropic.py | 325 ++++-------------- .../google_genai/test_google_genai.py | 166 ++------- .../integrations/langchain/test_langchain.py | 137 +++----- .../integrations/langgraph/test_langgraph.py | 86 ++--- tests/integrations/litellm/test_litellm.py | 103 ++---- 5 files changed, 201 insertions(+), 616 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 0c5d110827..d6b2c269d9 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3685,20 +3685,14 @@ def mock_messages_create(*args, **kwargs): assert stored_messages[0]["role"] == expected_role -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_anthropic_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_anthropic_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3714,82 +3708,43 @@ def test_anthropic_message_truncation( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(): + client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio -async def test_anthropic_message_truncation_async( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +async def test_anthropic_message_truncation_async(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -3805,68 +3760,30 @@ async def test_anthropic_message_truncation_async( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + with start_transaction(): + await client.messages.create(max_tokens=1024, messages=messages, model="model") - assert isinstance(messages_data, str) + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -5260,21 +5177,14 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_image( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_image(sentry_init, capture_events): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5295,31 +5205,15 @@ def test_message_with_base64_image( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -5469,21 +5363,14 @@ def test_message_with_file_image( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_pdf( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_pdf(sentry_init, capture_events): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5504,30 +5391,14 @@ def test_message_with_base64_pdf( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -5672,21 +5543,14 @@ def test_message_with_file_document( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_mixed_content( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_mixed_content(sentry_init, capture_events): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5723,30 +5587,14 @@ def test_message_with_mixed_content( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -5778,21 +5626,14 @@ def test_message_with_mixed_content( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_multiple_images_different_formats( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_multiple_images_different_formats(sentry_init, capture_events): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5828,30 +5669,14 @@ def test_message_with_multiple_images_different_formats( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 69287afb61..723a71959d 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1436,21 +1436,16 @@ def test_tool_calls_extraction( assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_google_genai_message_truncation( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1459,39 +1454,21 @@ def test_google_genai_message_truncation( mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents=[large_content, small_content], - config=create_test_config(), - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=[large_content, small_content], config=create_test_config(), ) - (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + (event,) = events + invoke_span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2574,21 +2551,16 @@ def test_generate_content_with_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_function_response( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with function_response (tool result).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2614,36 +2586,18 @@ def test_generate_content_with_function_response( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -2652,21 +2606,16 @@ def test_generate_content_with_function_response( assert messages[0]["content"]["output"] == "Sunny, 72F" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_mixed_string_and_content( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2683,36 +2632,18 @@ def test_generate_content_with_mixed_string_and_content( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" @@ -2775,13 +2706,8 @@ def test_generate_content_with_part_object_directly( assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_list_of_dicts( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """ Test generate_content with list of dict format inputs. @@ -2794,8 +2720,8 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2806,36 +2732,18 @@ def test_generate_content_with_list_of_dicts( {"role": "user", "parts": [{"text": "Second user message"}]}, ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7a39f74ffc..3a2ef76a5a 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -2991,13 +2991,7 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langchain_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langchain_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langchain integration.""" from langchain_core.outputs import LLMResult, Generation @@ -3005,8 +2999,8 @@ def test_langchain_message_truncation( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -3024,101 +3018,48 @@ def test_langchain_message_truncation( "small message 5", ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] - messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - llm_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" - ] + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + name="my_pipeline", + invocation_params={ + "temperature": 0.7, + "max_tokens": 100, + "model": "gpt-3.5-turbo", + }, + ) - assert len(llm_spans) > 0 + response = LLMResult( + generations=[[Generation(text="The response")]], + llm_output={ + "token_usage": { + "total_tokens": 25, + "prompt_tokens": 10, + "completion_tokens": 15, + } + }, + ) + callback.on_llm_end(response=response, run_id=run_id) - llm_span = llm_spans[0] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + assert len(llm_spans) > 0 - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] - messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + llm_span = llm_spans[0] + assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] + messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 68d592bd1d..b8554f2f60 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -1988,13 +1988,7 @@ def __init__(self, content, message_type="human"): assert "ai" not in roles -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langgraph_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langgraph_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langgraph integration.""" import json @@ -2002,8 +1996,8 @@ def test_langgraph_message_truncation( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2023,66 +2017,28 @@ def test_langgraph_message_truncation( def original_invoke(self, *args, **kwargs): return {"messages": args[0].get("messages", [])} - if stream_gen_ai_spans: - items = capture_items("transaction", "span") + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 - - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - (tx,) = (item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - invoke_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 + assert result is not None + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + invoke_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) > 0 - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 76aea9093d..703ae67b1a 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2349,20 +2349,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_litellm_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2376,78 +2370,39 @@ def test_litellm_message_truncation( ] mock_response = MockCompletionResponse() - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5